diff options
author | Alejandro Colomar <alx@kernel.org> | 2023-11-03 17:41:22 +0100 |
---|---|---|
committer | Alejandro Colomar <alx@kernel.org> | 2023-11-03 23:05:48 +0100 |
commit | 2be04e240deaac14500e079c956611e2c1d0c6f3 (patch) | |
tree | 7eaaff79a06151fa6d3cb90aebe413a8862ce49a /bin | |
parent | 55dd5d7255c85c3229efb42cdedfef496d983c76 (diff) |
bin/grepc: Optimize
Call pcre2grep(1) only once, which allows removing named pipes. This is
an important optimization, and will also allow accepting several file
names in the command line (in a future commit). The source code is also
significantly simplified.
It has a caveat: this single call to pcre2grep(1) will consume more
resources, and will crash on certain input files. The workaround is to
restrict the search to just one or a few types of code. Document this
in the manual page. This caveat only applies to very specific files,
which so far I've only found in the Linux kernel source tree.
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/grepc | 286 |
1 files changed, 50 insertions, 236 deletions
@@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Defaults: @@ -183,26 +183,60 @@ grepc_parse_cmd() } -grepc_helper() +grepc_e() { echo '(?s)^([\w[]+[\w\s]*)?\benum\b[ \t]*([\w \t[\]]|::)*\n*([ \t]*){[^}]*^[ \t]*'"$1"'\b\s*[=,].*?^\3}.*?;'; } +grepc_fp() { echo '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)+[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))(?:[\w\s\(,\)[\]]|::)*;'; } +grepc_fd() { echo '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))[ \t]*\n([ \t]*){.*?^\2}'; } +grepc_fgd_libm() { grepc_fd "M_DECL_FUNC \(__$1\)"; } +grepc_fgp_libio() { grepc_fp "_IO_$1"; } +grepc_fgd_libio() { grepc_fd "_IO_$1"; } +grepc_fgp() { grepc_fgp_libio "$1"; } +grepc_fsp() { echo '(?s)^asmlinkage\s+[\w\s]+\**sys_'"$1"'\s*\(.*?\)'; } +grepc_fsd() { echo '(?s)^(COMPAT_)?SYSCALL_DEFINE.\('"$1"'\b.*?^}'; } +grepc_mf() { echo '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\(.*?[^\\]$'; } +grepc_mo() { echo '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\b(?!\().*?(?<!\\)$'; } +grepc_t_braced() { echo '(?s)^([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union|enum)\b([\w \t[\]]|::)+\b'"$1"'\b[ \t]*\n*([ \t]*){.*?^\5}.*?;'; } +grepc_t_td_simple() { echo '(?s)^[ \t]*typedef\s+[^{};]+'"$1"';'; } +grepc_t_td_braced() { echo '(?s)^[ \t]*typedef\s+(struct|union|enum)\b(?:(?!\W'"$1"'\W)([\w \t[\]]|::))*\n*([ \t]*){(?:(?!^\3?}).)*?^\3}\s*'"$1"'(\[[\w\(,\)]\])*;'; } +grepc_t_td_func() { echo '(?s)^[ \t]*typedef\s+[^{};]+\(\**'"$1"'\)\s*\([^{};]+;'; } +grepc_ue() { echo '(?s)^([\w[]+[\w\s]*)?\benum\b([\w \t[\]]|::)*\n*([ \t]*){[^}]*^\s*\w+[\w\s[\]=]*'"$1"'.*?^\3}.*?;'; } +grepc_uf_def() { echo '(?s)^[\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\w+\s*\(([\w\s\(,\)[\]*]|::)+?(\.\.\.)?\)[ \t]*\n*([ \t]*){(?:(?!^\4?}).)*'"$1"'.*?^\4}'; } +grepc_linux_use_func_syscall_def() { echo '(?s)^(COMPAT_)?SYSCALL_DEFINE.\(\w+\b(?:(?!^}).)*'"$1"'.?^}'; } +grepc_uf_linux_def() { grepc_linux_use_func_syscall_def "$1"; } +grepc_um() { echo '(?s)^[ \t]*#\s*define\s[\s\\]*\w+\b(\([^\)]*\))?(?:(?![^\\]$).)*'"$1"'.*?[^\\]$'; } +grepc_ut_su() { echo '(?s)^(?!^[ \t]*typedef\b)([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union)\b([\w \t[\]]|::)*\w+[ \t]*\n*([ \t]*){(?:(?!^\5?}).)*?'"$1"'.*?^\5}.*?;'; } +grepc_ut_td_simple() { echo '(?s)^[ \t]*typedef\s+[^{};]*'"$1"'[^{};]+;'; } +grepc_ut_td_su() { echo '(?s)^[ \t]*typedef\s+(struct|union)\b([\w \t[\]]|::)*\n*([ \t]*){(?:(?!^\3?}|^\s*typedef).)*'"$1"'(?:(?!^\3?}|^\s*typedef).)*^\3}\s*\w+;'; } + + +grepc_patterns() { - grep -${iflag}zP -- "$1" \ - | grep -${iflag}zP -- "$2" \ - | head -c-1 \ - | pcre2grep -${iflag}${lflag}HMn --label="$file" -- "$3" \ - | if [ "$kflag" = 'no' ]; then - sed -E 's/^[^: ]+:[0-9]+:/\n\n&\n/'; - else - cat; - fi; + test $t_e = yes && grepc_e "$1"; + test $t_fp = yes && grepc_fp "$1"; + test $t_fd = yes && grepc_fd "$1"; + test $t_fsp = yes && grepc_fsp "$1"; + test $t_fsd = yes && grepc_fsd "$1"; + test $t_fgp = yes && grepc_fgp "$1"; + test $t_fgd_libm = yes && grepc_fgd_libm "$1"; + test $t_fgd_libio = yes && grepc_fgd_libio "$1"; + test $t_mf = yes && grepc_mf "$1"; + test $t_mo = yes && grepc_mo "$1"; + test $t_t_braced = yes && grepc_t_braced "$1"; + test $t_t_td_simple = yes && grepc_t_td_simple "$1"; + test $t_t_td_braced = yes && grepc_t_td_braced "$1"; + test $t_t_td_func = yes && grepc_t_td_func "$1"; + test $t_ue = yes && grepc_ue "$1"; + test $t_uf_def = yes && grepc_uf_def "$1"; + test $t_uf_linux_def = yes && grepc_uf_linux_def "$1"; + test $t_um = yes && grepc_um "$1"; + test $t_ut_su = yes && grepc_ut_su "$1"; + test $t_ut_td_simple = yes && grepc_ut_td_simple "$1"; + test $t_ut_td_su = yes && grepc_ut_td_su "$1"; } -grepc_helper_use() +grepc_search() { - grep -${iflag}zP -- "$1" \ - | grep -${iflag}zP -- "$2" \ - | head -c-1 \ - | pcre2grep -${iflag}${lflag}HMn --label="$file" -- "$3" \ + pcre2grep -${iflag}${lflag}HMn --label="$file" -f <(grepc_patterns "$identifier") \ | if [ "$kflag" = 'no' ]; then sed -E 's/^[^: ]+:[0-9]+:/\n\n&\n/'; else @@ -216,226 +250,6 @@ grepc_helper_use() } -grepc_e() -{ - grepc_helper \ - '\benum\b' \ - "^[ \t]*$1\b\s*[,=]" \ - '(?s)^([\w[]+[\w\s]*)?\benum\b[ \t]*([\w \t[\]]|::)*\n*([ \t]*){[^}]*^[ \t]*'"$1"'\b\s*[=,].*?^\3}.*?;'; -} - - -grepc_fp() -{ - grepc_helper \ - "(\($1\)|\b$1)\s*\(" \ - '.' \ - '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)+[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))(?:[\w\s\(,\)[\]]|::)*;'; -} - - -grepc_fd() -{ - grepc_helper \ - "(\($1\)|\b$1)\s*\(" \ - '.' \ - '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))[ \t]*\n([ \t]*){.*?^\2}'; -} - - -grepc_fgd_libm() { grepc_fd "M_DECL_FUNC \(__$1\)"; } -grepc_fgp_libio() { grepc_fp "_IO_$1"; } -grepc_fgd_libio() { grepc_fd "_IO_$1"; } -grepc_fgp() { grepc_fgp_libio "$1"; } - - -grepc_fsp() -{ - grepc_helper \ - "^asmlinkage\s+[\w\s]+\**sys_$1\s*\(" \ - '.' \ - '(?s)^asmlinkage\s+[\w\s]+\**'"sys_$1"'\s*\(.*?\)'; -} - - -grepc_fsd() -{ - grepc_helper \ - "SYSCALL_DEFINE.\($1\b" \ - '.' \ - '(?s)^(COMPAT_)?SYSCALL_DEFINE.\('"$1"'\b.*?^}'; -} - - -grepc_mf() -{ - grepc_helper \ - "#\s*define\s+$1\(" \ - '.' \ - '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\(.*?[^\\]$'; -} - - -grepc_mo() -{ - grepc_helper \ - "#\s*define\s+$1\b(?!\()" \ - '.' \ - '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\b(?!\().*?(?<!\\)$'; -} - - -grepc_t_braced() -{ - grepc_helper \ - '\b(struct|union|enum)\b([\w \t[\]]|::)+\b'"$1"'\b' \ - '.' \ - '(?s)^([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union|enum)\b([\w \t[\]]|::)+\b'"$1"'\b[ \t]*\n*([ \t]*){.*?^\5}.*?;'; -} - - -grepc_t_td_simple() -{ - grepc_helper \ - '^[ \t]*typedef\s' \ - "\b$1;" \ - '(?s)^[ \t]*typedef\s+[^{};]+'"$1"';'; -} - - -grepc_t_td_braced() -{ - grepc_helper \ - '^[ \t]*typedef\s+(struct|union|enum)\b[^;]*$' \ - "^[ \t]*}\s*$1(\[[\w\(,\)]\])*;" \ - '(?s)^[ \t]*typedef\s+(struct|union|enum)\b(?:(?!\W'"$1"'\W)([\w \t[\]]|::))*\n*([ \t]*){(?:(?!^\3?}).)*?^\3}\s*'"$1"'(\[[\w\(,\)]\])*;'; -} - - -grepc_t_td_func() -{ - grepc_helper \ - '^[ \t]*typedef\s' \ - "\(\**$1\)\s*\(" \ - '(?s)^[ \t]*typedef\s+[^{};]+\(\**'"$1"'\)\s*\([^{};]+;'; -} - - -grepc_ue() -{ - grepc_helper_use \ - '\benum\b' \ - "\b$1\b" \ - '(?s)^([\w[]+[\w\s]*)?\benum\b([\w \t[\]]|::)*\n*([ \t]*){[^}]*^\s*\w+[\w\s[\]=]*'"$1"'.*?^\3}.*?;'; -} - - -grepc_uf_def() -{ - grepc_helper_use \ - "\b$1\b" \ - '.' \ - '(?s)^[\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\w+\s*\(([\w\s\(,\)[\]*]|::)+?(\.\.\.)?\)[ \t]*\n*([ \t]*){(?:(?!^\4?}).)*'"$1"'.*?^\4}'; -} - - -grepc_linux_use_func_syscall_def() -{ - grepc_helper_use \ - "SYSCALL_DEFINE.\(" \ - "\b$1\b" \ - '(?s)^(COMPAT_)?SYSCALL_DEFINE.\(\w+\b(?:(?!^}).)*'"$1"'.?^}'; -} - - -grepc_uf_linux_def() { grepc_linux_use_func_syscall_def "$1"; } - - -grepc_um() -{ - grepc_helper_use \ - "\b$1\b" \ - 'define' \ - '(?s)^[ \t]*#\s*define\s[\s\\]*\w+\b(\([^\)]*\))?(?:(?![^\\]$).)*'"$1"'.*?[^\\]$'; -} - - -grepc_ut_su() -{ - grepc_helper_use \ - "\b(struct|union)\b" \ - "\b$1\b" \ - '(?s)^(?!^[ \t]*typedef\b)([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union)\b([\w \t[\]]|::)*\w+[ \t]*\n*([ \t]*){(?:(?!^\5?}).)*?'"$1"'.*?^\5}.*?;'; -} - - -grepc_ut_td_simple() -{ - grepc_helper_use \ - '^[ \t]*typedef\s' \ - "\b$1\b" \ - '(?s)^[ \t]*typedef\s+[^{};]*'"$1"'[^{};]+;'; -} - - -grepc_ut_td_su() -{ - grepc_helper_use \ - '^[ \t]*typedef\s+(struct|union)\b[^;]*$' \ - "\b$1\b" \ - '(?s)^[ \t]*typedef\s+(struct|union)\b([\w \t[\]]|::)*\n*([ \t]*){(?:(?!^\3?}|^\s*typedef).)*'"$1"'(?:(?!^\3?}|^\s*typedef).)*^\3}\s*\w+;'; -} - - -grepc_search() -{ - local t=""; - local f="$(mktemp -u -t grepc.XXXXXX)"; - local fi=""; - local fo=""; - - t="$t e"; - t="$t fp fd fsp fsd fgp fgd_libm fgd_libio"; - t="$t mf mo"; - t="$t t_braced t_td_simple t_td_braced t_td_func"; - t="$t ue uf_def uf_linux_def um ut_su ut_td_simple ut_td_su"; - - for ti in $t; do - fi="$fi $f.$ti.i"; - fo="$fo $f.$ti.o"; - done; - - mkfifo -m600 $fi $fo; - cat $fo & - - if test $t_e = yes; then grepc_e "$1"; else cat >/dev/null & printf ''; fi <$f.e.i >$f.e.o & - if test $t_fp = yes; then grepc_fp "$1"; else cat >/dev/null & printf ''; fi <$f.fp.i >$f.fp.o & - if test $t_fd = yes; then grepc_fd "$1"; else cat >/dev/null & printf ''; fi <$f.fd.i >$f.fd.o & - if test $t_fsp = yes; then grepc_fsp "$1"; else cat >/dev/null & printf ''; fi <$f.fsp.i >$f.fsp.o & - if test $t_fsd = yes; then grepc_fsd "$1"; else cat >/dev/null & printf ''; fi <$f.fsd.i >$f.fsd.o & - if test $t_fgp = yes; then grepc_fgp "$1"; else cat >/dev/null & printf ''; fi <$f.fgp.i >$f.fgp.o & - if test $t_fgd_libm = yes; then grepc_fgd_libm "$1"; else cat >/dev/null & printf ''; fi <$f.fgd_libm.i >$f.fgd_libm.o & - if test $t_fgd_libio = yes; then grepc_fgd_libio "$1"; else cat >/dev/null & printf ''; fi <$f.fgd_libio.i >$f.fgd_libio.o & - if test $t_mf = yes; then grepc_mf "$1"; else cat >/dev/null & printf ''; fi <$f.mf.i >$f.mf.o & - if test $t_mo = yes; then grepc_mo "$1"; else cat >/dev/null & printf ''; fi <$f.mo.i >$f.mo.o & - if test $t_t_braced = yes; then grepc_t_braced "$1"; else cat >/dev/null & printf ''; fi <$f.t_braced.i >$f.t_braced.o & - if test $t_t_td_simple = yes; then grepc_t_td_simple "$1"; else cat >/dev/null & printf ''; fi <$f.t_td_simple.i >$f.t_td_simple.o & - if test $t_t_td_braced = yes; then grepc_t_td_braced "$1"; else cat >/dev/null & printf ''; fi <$f.t_td_braced.i >$f.t_td_braced.o & - if test $t_t_td_func = yes; then grepc_t_td_func "$1"; else cat >/dev/null & printf ''; fi <$f.t_td_func.i >$f.t_td_func.o & - if test $t_ue = yes; then grepc_ue "$1"; else cat >/dev/null & printf ''; fi <$f.ue.i >$f.ue.o & - if test $t_uf_def = yes; then grepc_uf_def "$1"; else cat >/dev/null & printf ''; fi <$f.uf_def.i >$f.uf_def.o & - if test $t_uf_linux_def = yes; then grepc_uf_linux_def "$1"; else cat >/dev/null & printf ''; fi <$f.uf_linux_def.i >$f.uf_linux_def.o & - if test $t_um = yes; then grepc_um "$1"; else cat >/dev/null & printf ''; fi <$f.um.i >$f.um.o & - if test $t_ut_su = yes; then grepc_ut_su "$1"; else cat >/dev/null & printf ''; fi <$f.ut_su.i >$f.ut_su.o & - if test $t_ut_td_simple = yes; then grepc_ut_td_simple "$1"; else cat >/dev/null & printf ''; fi <$f.ut_td_simple.i >$f.ut_td_simple.o & - if test $t_ut_td_su = yes; then grepc_ut_td_su "$1"; else cat >/dev/null & printf ''; fi <$f.ut_td_su.i >$f.ut_td_su.o & - - tee $fi >/dev/null; - wait; - rm $fi $fo; -} - - main() { grepc_parse_cmd "$@" </dev/null; |