diff options
author | Alejandro Colomar <alx@kernel.org> | 2023-11-03 17:41:22 +0100 |
---|---|---|
committer | Alejandro Colomar <alx@kernel.org> | 2023-11-03 23:05:48 +0100 |
commit | 2be04e240deaac14500e079c956611e2c1d0c6f3 (patch) | |
tree | 7eaaff79a06151fa6d3cb90aebe413a8862ce49a | |
parent | 55dd5d7255c85c3229efb42cdedfef496d983c76 (diff) |
bin/grepc: Optimize
Call pcre2grep(1) only once, which allows removing named pipes. This is
an important optimization, and will also allow accepting several file
names in the command line (in a future commit). The source code is also
significantly simplified.
It has a caveat: this single call to pcre2grep(1) will consume more
resources, and will crash on certain input files. The workaround is to
restrict the search to just one or a few types of code. Document this
in the manual page. This caveat only applies to very specific files,
which so far I've only found in the Linux kernel source tree.
-rwxr-xr-x | bin/grepc | 286 | ||||
-rw-r--r-- | share/man/man1/grepc.1 | 7 |
2 files changed, 57 insertions, 236 deletions
@@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Defaults: @@ -183,26 +183,60 @@ grepc_parse_cmd() } -grepc_helper() +grepc_e() { echo '(?s)^([\w[]+[\w\s]*)?\benum\b[ \t]*([\w \t[\]]|::)*\n*([ \t]*){[^}]*^[ \t]*'"$1"'\b\s*[=,].*?^\3}.*?;'; } +grepc_fp() { echo '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)+[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))(?:[\w\s\(,\)[\]]|::)*;'; } +grepc_fd() { echo '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))[ \t]*\n([ \t]*){.*?^\2}'; } +grepc_fgd_libm() { grepc_fd "M_DECL_FUNC \(__$1\)"; } +grepc_fgp_libio() { grepc_fp "_IO_$1"; } +grepc_fgd_libio() { grepc_fd "_IO_$1"; } +grepc_fgp() { grepc_fgp_libio "$1"; } +grepc_fsp() { echo '(?s)^asmlinkage\s+[\w\s]+\**sys_'"$1"'\s*\(.*?\)'; } +grepc_fsd() { echo '(?s)^(COMPAT_)?SYSCALL_DEFINE.\('"$1"'\b.*?^}'; } +grepc_mf() { echo '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\(.*?[^\\]$'; } +grepc_mo() { echo '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\b(?!\().*?(?<!\\)$'; } +grepc_t_braced() { echo '(?s)^([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union|enum)\b([\w \t[\]]|::)+\b'"$1"'\b[ \t]*\n*([ \t]*){.*?^\5}.*?;'; } +grepc_t_td_simple() { echo '(?s)^[ \t]*typedef\s+[^{};]+'"$1"';'; } +grepc_t_td_braced() { echo '(?s)^[ \t]*typedef\s+(struct|union|enum)\b(?:(?!\W'"$1"'\W)([\w \t[\]]|::))*\n*([ \t]*){(?:(?!^\3?}).)*?^\3}\s*'"$1"'(\[[\w\(,\)]\])*;'; } +grepc_t_td_func() { echo '(?s)^[ \t]*typedef\s+[^{};]+\(\**'"$1"'\)\s*\([^{};]+;'; } +grepc_ue() { echo '(?s)^([\w[]+[\w\s]*)?\benum\b([\w \t[\]]|::)*\n*([ \t]*){[^}]*^\s*\w+[\w\s[\]=]*'"$1"'.*?^\3}.*?;'; } +grepc_uf_def() { echo '(?s)^[\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\w+\s*\(([\w\s\(,\)[\]*]|::)+?(\.\.\.)?\)[ \t]*\n*([ \t]*){(?:(?!^\4?}).)*'"$1"'.*?^\4}'; } +grepc_linux_use_func_syscall_def() { echo '(?s)^(COMPAT_)?SYSCALL_DEFINE.\(\w+\b(?:(?!^}).)*'"$1"'.?^}'; } +grepc_uf_linux_def() { grepc_linux_use_func_syscall_def "$1"; } +grepc_um() { echo '(?s)^[ \t]*#\s*define\s[\s\\]*\w+\b(\([^\)]*\))?(?:(?![^\\]$).)*'"$1"'.*?[^\\]$'; } +grepc_ut_su() { echo '(?s)^(?!^[ \t]*typedef\b)([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union)\b([\w \t[\]]|::)*\w+[ \t]*\n*([ \t]*){(?:(?!^\5?}).)*?'"$1"'.*?^\5}.*?;'; } +grepc_ut_td_simple() { echo '(?s)^[ \t]*typedef\s+[^{};]*'"$1"'[^{};]+;'; } +grepc_ut_td_su() { echo '(?s)^[ \t]*typedef\s+(struct|union)\b([\w \t[\]]|::)*\n*([ \t]*){(?:(?!^\3?}|^\s*typedef).)*'"$1"'(?:(?!^\3?}|^\s*typedef).)*^\3}\s*\w+;'; } + + +grepc_patterns() { - grep -${iflag}zP -- "$1" \ - | grep -${iflag}zP -- "$2" \ - | head -c-1 \ - | pcre2grep -${iflag}${lflag}HMn --label="$file" -- "$3" \ - | if [ "$kflag" = 'no' ]; then - sed -E 's/^[^: ]+:[0-9]+:/\n\n&\n/'; - else - cat; - fi; + test $t_e = yes && grepc_e "$1"; + test $t_fp = yes && grepc_fp "$1"; + test $t_fd = yes && grepc_fd "$1"; + test $t_fsp = yes && grepc_fsp "$1"; + test $t_fsd = yes && grepc_fsd "$1"; + test $t_fgp = yes && grepc_fgp "$1"; + test $t_fgd_libm = yes && grepc_fgd_libm "$1"; + test $t_fgd_libio = yes && grepc_fgd_libio "$1"; + test $t_mf = yes && grepc_mf "$1"; + test $t_mo = yes && grepc_mo "$1"; + test $t_t_braced = yes && grepc_t_braced "$1"; + test $t_t_td_simple = yes && grepc_t_td_simple "$1"; + test $t_t_td_braced = yes && grepc_t_td_braced "$1"; + test $t_t_td_func = yes && grepc_t_td_func "$1"; + test $t_ue = yes && grepc_ue "$1"; + test $t_uf_def = yes && grepc_uf_def "$1"; + test $t_uf_linux_def = yes && grepc_uf_linux_def "$1"; + test $t_um = yes && grepc_um "$1"; + test $t_ut_su = yes && grepc_ut_su "$1"; + test $t_ut_td_simple = yes && grepc_ut_td_simple "$1"; + test $t_ut_td_su = yes && grepc_ut_td_su "$1"; } -grepc_helper_use() +grepc_search() { - grep -${iflag}zP -- "$1" \ - | grep -${iflag}zP -- "$2" \ - | head -c-1 \ - | pcre2grep -${iflag}${lflag}HMn --label="$file" -- "$3" \ + pcre2grep -${iflag}${lflag}HMn --label="$file" -f <(grepc_patterns "$identifier") \ | if [ "$kflag" = 'no' ]; then sed -E 's/^[^: ]+:[0-9]+:/\n\n&\n/'; else @@ -216,226 +250,6 @@ grepc_helper_use() } -grepc_e() -{ - grepc_helper \ - '\benum\b' \ - "^[ \t]*$1\b\s*[,=]" \ - '(?s)^([\w[]+[\w\s]*)?\benum\b[ \t]*([\w \t[\]]|::)*\n*([ \t]*){[^}]*^[ \t]*'"$1"'\b\s*[=,].*?^\3}.*?;'; -} - - -grepc_fp() -{ - grepc_helper \ - "(\($1\)|\b$1)\s*\(" \ - '.' \ - '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)+[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))(?:[\w\s\(,\)[\]]|::)*;'; -} - - -grepc_fd() -{ - grepc_helper \ - "(\($1\)|\b$1)\s*\(" \ - '.' \ - '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))[ \t]*\n([ \t]*){.*?^\2}'; -} - - -grepc_fgd_libm() { grepc_fd "M_DECL_FUNC \(__$1\)"; } -grepc_fgp_libio() { grepc_fp "_IO_$1"; } -grepc_fgd_libio() { grepc_fd "_IO_$1"; } -grepc_fgp() { grepc_fgp_libio "$1"; } - - -grepc_fsp() -{ - grepc_helper \ - "^asmlinkage\s+[\w\s]+\**sys_$1\s*\(" \ - '.' \ - '(?s)^asmlinkage\s+[\w\s]+\**'"sys_$1"'\s*\(.*?\)'; -} - - -grepc_fsd() -{ - grepc_helper \ - "SYSCALL_DEFINE.\($1\b" \ - '.' \ - '(?s)^(COMPAT_)?SYSCALL_DEFINE.\('"$1"'\b.*?^}'; -} - - -grepc_mf() -{ - grepc_helper \ - "#\s*define\s+$1\(" \ - '.' \ - '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\(.*?[^\\]$'; -} - - -grepc_mo() -{ - grepc_helper \ - "#\s*define\s+$1\b(?!\()" \ - '.' \ - '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\b(?!\().*?(?<!\\)$'; -} - - -grepc_t_braced() -{ - grepc_helper \ - '\b(struct|union|enum)\b([\w \t[\]]|::)+\b'"$1"'\b' \ - '.' \ - '(?s)^([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union|enum)\b([\w \t[\]]|::)+\b'"$1"'\b[ \t]*\n*([ \t]*){.*?^\5}.*?;'; -} - - -grepc_t_td_simple() -{ - grepc_helper \ - '^[ \t]*typedef\s' \ - "\b$1;" \ - '(?s)^[ \t]*typedef\s+[^{};]+'"$1"';'; -} - - -grepc_t_td_braced() -{ - grepc_helper \ - '^[ \t]*typedef\s+(struct|union|enum)\b[^;]*$' \ - "^[ \t]*}\s*$1(\[[\w\(,\)]\])*;" \ - '(?s)^[ \t]*typedef\s+(struct|union|enum)\b(?:(?!\W'"$1"'\W)([\w \t[\]]|::))*\n*([ \t]*){(?:(?!^\3?}).)*?^\3}\s*'"$1"'(\[[\w\(,\)]\])*;'; -} - - -grepc_t_td_func() -{ - grepc_helper \ - '^[ \t]*typedef\s' \ - "\(\**$1\)\s*\(" \ - '(?s)^[ \t]*typedef\s+[^{};]+\(\**'"$1"'\)\s*\([^{};]+;'; -} - - -grepc_ue() -{ - grepc_helper_use \ - '\benum\b' \ - "\b$1\b" \ - '(?s)^([\w[]+[\w\s]*)?\benum\b([\w \t[\]]|::)*\n*([ \t]*){[^}]*^\s*\w+[\w\s[\]=]*'"$1"'.*?^\3}.*?;'; -} - - -grepc_uf_def() -{ - grepc_helper_use \ - "\b$1\b" \ - '.' \ - '(?s)^[\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\w+\s*\(([\w\s\(,\)[\]*]|::)+?(\.\.\.)?\)[ \t]*\n*([ \t]*){(?:(?!^\4?}).)*'"$1"'.*?^\4}'; -} - - -grepc_linux_use_func_syscall_def() -{ - grepc_helper_use \ - "SYSCALL_DEFINE.\(" \ - "\b$1\b" \ - '(?s)^(COMPAT_)?SYSCALL_DEFINE.\(\w+\b(?:(?!^}).)*'"$1"'.?^}'; -} - - -grepc_uf_linux_def() { grepc_linux_use_func_syscall_def "$1"; } - - -grepc_um() -{ - grepc_helper_use \ - "\b$1\b" \ - 'define' \ - '(?s)^[ \t]*#\s*define\s[\s\\]*\w+\b(\([^\)]*\))?(?:(?![^\\]$).)*'"$1"'.*?[^\\]$'; -} - - -grepc_ut_su() -{ - grepc_helper_use \ - "\b(struct|union)\b" \ - "\b$1\b" \ - '(?s)^(?!^[ \t]*typedef\b)([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union)\b([\w \t[\]]|::)*\w+[ \t]*\n*([ \t]*){(?:(?!^\5?}).)*?'"$1"'.*?^\5}.*?;'; -} - - -grepc_ut_td_simple() -{ - grepc_helper_use \ - '^[ \t]*typedef\s' \ - "\b$1\b" \ - '(?s)^[ \t]*typedef\s+[^{};]*'"$1"'[^{};]+;'; -} - - -grepc_ut_td_su() -{ - grepc_helper_use \ - '^[ \t]*typedef\s+(struct|union)\b[^;]*$' \ - "\b$1\b" \ - '(?s)^[ \t]*typedef\s+(struct|union)\b([\w \t[\]]|::)*\n*([ \t]*){(?:(?!^\3?}|^\s*typedef).)*'"$1"'(?:(?!^\3?}|^\s*typedef).)*^\3}\s*\w+;'; -} - - -grepc_search() -{ - local t=""; - local f="$(mktemp -u -t grepc.XXXXXX)"; - local fi=""; - local fo=""; - - t="$t e"; - t="$t fp fd fsp fsd fgp fgd_libm fgd_libio"; - t="$t mf mo"; - t="$t t_braced t_td_simple t_td_braced t_td_func"; - t="$t ue uf_def uf_linux_def um ut_su ut_td_simple ut_td_su"; - - for ti in $t; do - fi="$fi $f.$ti.i"; - fo="$fo $f.$ti.o"; - done; - - mkfifo -m600 $fi $fo; - cat $fo & - - if test $t_e = yes; then grepc_e "$1"; else cat >/dev/null & printf ''; fi <$f.e.i >$f.e.o & - if test $t_fp = yes; then grepc_fp "$1"; else cat >/dev/null & printf ''; fi <$f.fp.i >$f.fp.o & - if test $t_fd = yes; then grepc_fd "$1"; else cat >/dev/null & printf ''; fi <$f.fd.i >$f.fd.o & - if test $t_fsp = yes; then grepc_fsp "$1"; else cat >/dev/null & printf ''; fi <$f.fsp.i >$f.fsp.o & - if test $t_fsd = yes; then grepc_fsd "$1"; else cat >/dev/null & printf ''; fi <$f.fsd.i >$f.fsd.o & - if test $t_fgp = yes; then grepc_fgp "$1"; else cat >/dev/null & printf ''; fi <$f.fgp.i >$f.fgp.o & - if test $t_fgd_libm = yes; then grepc_fgd_libm "$1"; else cat >/dev/null & printf ''; fi <$f.fgd_libm.i >$f.fgd_libm.o & - if test $t_fgd_libio = yes; then grepc_fgd_libio "$1"; else cat >/dev/null & printf ''; fi <$f.fgd_libio.i >$f.fgd_libio.o & - if test $t_mf = yes; then grepc_mf "$1"; else cat >/dev/null & printf ''; fi <$f.mf.i >$f.mf.o & - if test $t_mo = yes; then grepc_mo "$1"; else cat >/dev/null & printf ''; fi <$f.mo.i >$f.mo.o & - if test $t_t_braced = yes; then grepc_t_braced "$1"; else cat >/dev/null & printf ''; fi <$f.t_braced.i >$f.t_braced.o & - if test $t_t_td_simple = yes; then grepc_t_td_simple "$1"; else cat >/dev/null & printf ''; fi <$f.t_td_simple.i >$f.t_td_simple.o & - if test $t_t_td_braced = yes; then grepc_t_td_braced "$1"; else cat >/dev/null & printf ''; fi <$f.t_td_braced.i >$f.t_td_braced.o & - if test $t_t_td_func = yes; then grepc_t_td_func "$1"; else cat >/dev/null & printf ''; fi <$f.t_td_func.i >$f.t_td_func.o & - if test $t_ue = yes; then grepc_ue "$1"; else cat >/dev/null & printf ''; fi <$f.ue.i >$f.ue.o & - if test $t_uf_def = yes; then grepc_uf_def "$1"; else cat >/dev/null & printf ''; fi <$f.uf_def.i >$f.uf_def.o & - if test $t_uf_linux_def = yes; then grepc_uf_linux_def "$1"; else cat >/dev/null & printf ''; fi <$f.uf_linux_def.i >$f.uf_linux_def.o & - if test $t_um = yes; then grepc_um "$1"; else cat >/dev/null & printf ''; fi <$f.um.i >$f.um.o & - if test $t_ut_su = yes; then grepc_ut_su "$1"; else cat >/dev/null & printf ''; fi <$f.ut_su.i >$f.ut_su.o & - if test $t_ut_td_simple = yes; then grepc_ut_td_simple "$1"; else cat >/dev/null & printf ''; fi <$f.ut_td_simple.i >$f.ut_td_simple.o & - if test $t_ut_td_su = yes; then grepc_ut_td_su "$1"; else cat >/dev/null & printf ''; fi <$f.ut_td_su.i >$f.ut_td_su.o & - - tee $fi >/dev/null; - wait; - rm $fi $fo; -} - - main() { grepc_parse_cmd "$@" </dev/null; diff --git a/share/man/man1/grepc.1 b/share/man/man1/grepc.1 index 2d9cde5..e80c648 100644 --- a/share/man/man1/grepc.1 +++ b/share/man/man1/grepc.1 @@ -151,6 +151,13 @@ This option can be passed multiple times to search for various types of code. Default: .BR "e f m t" . +.SH CAVEATS +In some cases, +internal calls to +.MR pcre2grep 1 +may fail after consuming too much resources. +To solve that, +restrict the "types of code" of your search. .SH EXAMPLES .EX .RB \(ti/src/nginx/unit$ " grepc nxt_sprintf;" |