diff options
authorAlejandro Colomar <>2023-11-03 17:41:22 +0100
committerAlejandro Colomar <>2023-11-03 23:05:48 +0100
commit2be04e240deaac14500e079c956611e2c1d0c6f3 (patch)
parent55dd5d7255c85c3229efb42cdedfef496d983c76 (diff)
bin/grepc: Optimize
Call pcre2grep(1) only once, which allows removing named pipes. This is an important optimization, and will also allow accepting several file names in the command line (in a future commit). The source code is also significantly simplified. It has a caveat: this single call to pcre2grep(1) will consume more resources, and will crash on certain input files. The workaround is to restrict the search to just one or a few types of code. Document this in the manual page. This caveat only applies to very specific files, which so far I've only found in the Linux kernel source tree.
2 files changed, 57 insertions, 236 deletions
diff --git a/bin/grepc b/bin/grepc
index 05eeee4..1a7b519 100755
--- a/bin/grepc
+++ b/bin/grepc
@@ -1,4 +1,4 @@
# Defaults:
@@ -183,26 +183,60 @@ grepc_parse_cmd()
+grepc_e() { echo '(?s)^([\w[]+[\w\s]*)?\benum\b[ \t]*([\w \t[\]]|::)*\n*([ \t]*){[^}]*^[ \t]*'"$1"'\b\s*[=,].*?^\3}.*?;'; }
+grepc_fp() { echo '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)+[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))(?:[\w\s\(,\)[\]]|::)*;'; }
+grepc_fd() { echo '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))[ \t]*\n([ \t]*){.*?^\2}'; }
+grepc_fgd_libm() { grepc_fd "M_DECL_FUNC \(__$1\)"; }
+grepc_fgp_libio() { grepc_fp "_IO_$1"; }
+grepc_fgd_libio() { grepc_fd "_IO_$1"; }
+grepc_fgp() { grepc_fgp_libio "$1"; }
+grepc_fsp() { echo '(?s)^asmlinkage\s+[\w\s]+\**sys_'"$1"'\s*\(.*?\)'; }
+grepc_fsd() { echo '(?s)^(COMPAT_)?SYSCALL_DEFINE.\('"$1"'\b.*?^}'; }
+grepc_mf() { echo '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\(.*?[^\\]$'; }
+grepc_mo() { echo '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\b(?!\().*?(?<!\\)$'; }
+grepc_t_braced() { echo '(?s)^([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union|enum)\b([\w \t[\]]|::)+\b'"$1"'\b[ \t]*\n*([ \t]*){.*?^\5}.*?;'; }
+grepc_t_td_simple() { echo '(?s)^[ \t]*typedef\s+[^{};]+'"$1"';'; }
+grepc_t_td_braced() { echo '(?s)^[ \t]*typedef\s+(struct|union|enum)\b(?:(?!\W'"$1"'\W)([\w \t[\]]|::))*\n*([ \t]*){(?:(?!^\3?}).)*?^\3}\s*'"$1"'(\[[\w\(,\)]\])*;'; }
+grepc_t_td_func() { echo '(?s)^[ \t]*typedef\s+[^{};]+\(\**'"$1"'\)\s*\([^{};]+;'; }
+grepc_ue() { echo '(?s)^([\w[]+[\w\s]*)?\benum\b([\w \t[\]]|::)*\n*([ \t]*){[^}]*^\s*\w+[\w\s[\]=]*'"$1"'.*?^\3}.*?;'; }
+grepc_uf_def() { echo '(?s)^[\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\w+\s*\(([\w\s\(,\)[\]*]|::)+?(\.\.\.)?\)[ \t]*\n*([ \t]*){(?:(?!^\4?}).)*'"$1"'.*?^\4}'; }
+grepc_linux_use_func_syscall_def() { echo '(?s)^(COMPAT_)?SYSCALL_DEFINE.\(\w+\b(?:(?!^}).)*'"$1"'.?^}'; }
+grepc_uf_linux_def() { grepc_linux_use_func_syscall_def "$1"; }
+grepc_um() { echo '(?s)^[ \t]*#\s*define\s[\s\\]*\w+\b(\([^\)]*\))?(?:(?![^\\]$).)*'"$1"'.*?[^\\]$'; }
+grepc_ut_su() { echo '(?s)^(?!^[ \t]*typedef\b)([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union)\b([\w \t[\]]|::)*\w+[ \t]*\n*([ \t]*){(?:(?!^\5?}).)*?'"$1"'.*?^\5}.*?;'; }
+grepc_ut_td_simple() { echo '(?s)^[ \t]*typedef\s+[^{};]*'"$1"'[^{};]+;'; }
+grepc_ut_td_su() { echo '(?s)^[ \t]*typedef\s+(struct|union)\b([\w \t[\]]|::)*\n*([ \t]*){(?:(?!^\3?}|^\s*typedef).)*'"$1"'(?:(?!^\3?}|^\s*typedef).)*^\3}\s*\w+;'; }
- grep -${iflag}zP -- "$1" \
- | grep -${iflag}zP -- "$2" \
- | head -c-1 \
- | pcre2grep -${iflag}${lflag}HMn --label="$file" -- "$3" \
- | if [ "$kflag" = 'no' ]; then
- sed -E 's/^[^: ]+:[0-9]+:/\n\n&\n/';
- else
- cat;
- fi;
+ test $t_e = yes && grepc_e "$1";
+ test $t_fp = yes && grepc_fp "$1";
+ test $t_fd = yes && grepc_fd "$1";
+ test $t_fsp = yes && grepc_fsp "$1";
+ test $t_fsd = yes && grepc_fsd "$1";
+ test $t_fgp = yes && grepc_fgp "$1";
+ test $t_fgd_libm = yes && grepc_fgd_libm "$1";
+ test $t_fgd_libio = yes && grepc_fgd_libio "$1";
+ test $t_mf = yes && grepc_mf "$1";
+ test $t_mo = yes && grepc_mo "$1";
+ test $t_t_braced = yes && grepc_t_braced "$1";
+ test $t_t_td_simple = yes && grepc_t_td_simple "$1";
+ test $t_t_td_braced = yes && grepc_t_td_braced "$1";
+ test $t_t_td_func = yes && grepc_t_td_func "$1";
+ test $t_ue = yes && grepc_ue "$1";
+ test $t_uf_def = yes && grepc_uf_def "$1";
+ test $t_uf_linux_def = yes && grepc_uf_linux_def "$1";
+ test $t_um = yes && grepc_um "$1";
+ test $t_ut_su = yes && grepc_ut_su "$1";
+ test $t_ut_td_simple = yes && grepc_ut_td_simple "$1";
+ test $t_ut_td_su = yes && grepc_ut_td_su "$1";
- grep -${iflag}zP -- "$1" \
- | grep -${iflag}zP -- "$2" \
- | head -c-1 \
- | pcre2grep -${iflag}${lflag}HMn --label="$file" -- "$3" \
+ pcre2grep -${iflag}${lflag}HMn --label="$file" -f <(grepc_patterns "$identifier") \
| if [ "$kflag" = 'no' ]; then
sed -E 's/^[^: ]+:[0-9]+:/\n\n&\n/';
@@ -216,226 +250,6 @@ grepc_helper_use()
- grepc_helper \
- '\benum\b' \
- "^[ \t]*$1\b\s*[,=]" \
- '(?s)^([\w[]+[\w\s]*)?\benum\b[ \t]*([\w \t[\]]|::)*\n*([ \t]*){[^}]*^[ \t]*'"$1"'\b\s*[=,].*?^\3}.*?;';
- grepc_helper \
- "(\($1\)|\b$1)\s*\(" \
- '.' \
- '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)+[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))(?:[\w\s\(,\)[\]]|::)*;';
- grepc_helper \
- "(\($1\)|\b$1)\s*\(" \
- '.' \
- '(?s)^[\w[](?:[\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\(?'"$1"'\)?\s*(\((?:[\w\s,[\]*]|::|(?1))*(?:\.\.\.)?\))[ \t]*\n([ \t]*){.*?^\2}';
-grepc_fgd_libm() { grepc_fd "M_DECL_FUNC \(__$1\)"; }
-grepc_fgp_libio() { grepc_fp "_IO_$1"; }
-grepc_fgd_libio() { grepc_fd "_IO_$1"; }
-grepc_fgp() { grepc_fgp_libio "$1"; }
- grepc_helper \
- "^asmlinkage\s+[\w\s]+\**sys_$1\s*\(" \
- '.' \
- '(?s)^asmlinkage\s+[\w\s]+\**'"sys_$1"'\s*\(.*?\)';
- grepc_helper \
- "SYSCALL_DEFINE.\($1\b" \
- '.' \
- '(?s)^(COMPAT_)?SYSCALL_DEFINE.\('"$1"'\b.*?^}';
- grepc_helper \
- "#\s*define\s+$1\(" \
- '.' \
- '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\(.*?[^\\]$';
- grepc_helper \
- "#\s*define\s+$1\b(?!\()" \
- '.' \
- '(?s)^[ \t]*#\s*define\s[\s\\]*'"$1"'\b(?!\().*?(?<!\\)$';
- grepc_helper \
- '\b(struct|union|enum)\b([\w \t[\]]|::)+\b'"$1"'\b' \
- '.' \
- '(?s)^([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union|enum)\b([\w \t[\]]|::)+\b'"$1"'\b[ \t]*\n*([ \t]*){.*?^\5}.*?;';
- grepc_helper \
- '^[ \t]*typedef\s' \
- "\b$1;" \
- '(?s)^[ \t]*typedef\s+[^{};]+'"$1"';';
- grepc_helper \
- '^[ \t]*typedef\s+(struct|union|enum)\b[^;]*$' \
- "^[ \t]*}\s*$1(\[[\w\(,\)]\])*;" \
- '(?s)^[ \t]*typedef\s+(struct|union|enum)\b(?:(?!\W'"$1"'\W)([\w \t[\]]|::))*\n*([ \t]*){(?:(?!^\3?}).)*?^\3}\s*'"$1"'(\[[\w\(,\)]\])*;';
- grepc_helper \
- '^[ \t]*typedef\s' \
- "\(\**$1\)\s*\(" \
- '(?s)^[ \t]*typedef\s+[^{};]+\(\**'"$1"'\)\s*\([^{};]+;';
- grepc_helper_use \
- '\benum\b' \
- "\b$1\b" \
- '(?s)^([\w[]+[\w\s]*)?\benum\b([\w \t[\]]|::)*\n*([ \t]*){[^}]*^\s*\w+[\w\s[\]=]*'"$1"'.*?^\3}.*?;';
- grepc_helper_use \
- "\b$1\b" \
- '.' \
- '(?s)^[\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+\**\w+\s*\(([\w\s\(,\)[\]*]|::)+?(\.\.\.)?\)[ \t]*\n*([ \t]*){(?:(?!^\4?}).)*'"$1"'.*?^\4}';
- grepc_helper_use \
- "\b$1\b" \
- '(?s)^(COMPAT_)?SYSCALL_DEFINE.\(\w+\b(?:(?!^}).)*'"$1"'.?^}';
-grepc_uf_linux_def() { grepc_linux_use_func_syscall_def "$1"; }
- grepc_helper_use \
- "\b$1\b" \
- 'define' \
- '(?s)^[ \t]*#\s*define\s[\s\\]*\w+\b(\([^\)]*\))?(?:(?![^\\]$).)*'"$1"'.*?[^\\]$';
- grepc_helper_use \
- "\b(struct|union)\b" \
- "\b$1\b" \
- '(?s)^(?!^[ \t]*typedef\b)([\w[]([\w\s\(,\)[\]*]|::)*[\w\s\)*\]]\s+)?\b(struct|union)\b([\w \t[\]]|::)*\w+[ \t]*\n*([ \t]*){(?:(?!^\5?}).)*?'"$1"'.*?^\5}.*?;';
- grepc_helper_use \
- '^[ \t]*typedef\s' \
- "\b$1\b" \
- '(?s)^[ \t]*typedef\s+[^{};]*'"$1"'[^{};]+;';
- grepc_helper_use \
- '^[ \t]*typedef\s+(struct|union)\b[^;]*$' \
- "\b$1\b" \
- '(?s)^[ \t]*typedef\s+(struct|union)\b([\w \t[\]]|::)*\n*([ \t]*){(?:(?!^\3?}|^\s*typedef).)*'"$1"'(?:(?!^\3?}|^\s*typedef).)*^\3}\s*\w+;';
- local t="";
- local f="$(mktemp -u -t grepc.XXXXXX)";
- local fi="";
- local fo="";
- t="$t e";
- t="$t fp fd fsp fsd fgp fgd_libm fgd_libio";
- t="$t mf mo";
- t="$t t_braced t_td_simple t_td_braced t_td_func";
- t="$t ue uf_def uf_linux_def um ut_su ut_td_simple ut_td_su";
- for ti in $t; do
- fi="$fi $f.$ti.i";
- fo="$fo $f.$ti.o";
- done;
- mkfifo -m600 $fi $fo;
- cat $fo &
- if test $t_e = yes; then grepc_e "$1"; else cat >/dev/null & printf ''; fi <$f.e.i >$f.e.o &
- if test $t_fp = yes; then grepc_fp "$1"; else cat >/dev/null & printf ''; fi <$f.fp.i >$f.fp.o &
- if test $t_fd = yes; then grepc_fd "$1"; else cat >/dev/null & printf ''; fi <$f.fd.i >$f.fd.o &
- if test $t_fsp = yes; then grepc_fsp "$1"; else cat >/dev/null & printf ''; fi <$f.fsp.i >$f.fsp.o &
- if test $t_fsd = yes; then grepc_fsd "$1"; else cat >/dev/null & printf ''; fi <$f.fsd.i >$f.fsd.o &
- if test $t_fgp = yes; then grepc_fgp "$1"; else cat >/dev/null & printf ''; fi <$f.fgp.i >$f.fgp.o &
- if test $t_fgd_libm = yes; then grepc_fgd_libm "$1"; else cat >/dev/null & printf ''; fi <$f.fgd_libm.i >$f.fgd_libm.o &
- if test $t_fgd_libio = yes; then grepc_fgd_libio "$1"; else cat >/dev/null & printf ''; fi <$f.fgd_libio.i >$f.fgd_libio.o &
- if test $t_mf = yes; then grepc_mf "$1"; else cat >/dev/null & printf ''; fi <$ >$ &
- if test $t_mo = yes; then grepc_mo "$1"; else cat >/dev/null & printf ''; fi <$ >$ &
- if test $t_t_braced = yes; then grepc_t_braced "$1"; else cat >/dev/null & printf ''; fi <$f.t_braced.i >$f.t_braced.o &
- if test $t_t_td_simple = yes; then grepc_t_td_simple "$1"; else cat >/dev/null & printf ''; fi <$f.t_td_simple.i >$f.t_td_simple.o &
- if test $t_t_td_braced = yes; then grepc_t_td_braced "$1"; else cat >/dev/null & printf ''; fi <$f.t_td_braced.i >$f.t_td_braced.o &
- if test $t_t_td_func = yes; then grepc_t_td_func "$1"; else cat >/dev/null & printf ''; fi <$f.t_td_func.i >$f.t_td_func.o &
- if test $t_ue = yes; then grepc_ue "$1"; else cat >/dev/null & printf ''; fi <$f.ue.i >$f.ue.o &
- if test $t_uf_def = yes; then grepc_uf_def "$1"; else cat >/dev/null & printf ''; fi <$f.uf_def.i >$f.uf_def.o &
- if test $t_uf_linux_def = yes; then grepc_uf_linux_def "$1"; else cat >/dev/null & printf ''; fi <$f.uf_linux_def.i >$f.uf_linux_def.o &
- if test $t_um = yes; then grepc_um "$1"; else cat >/dev/null & printf ''; fi <$ >$ &
- if test $t_ut_su = yes; then grepc_ut_su "$1"; else cat >/dev/null & printf ''; fi <$f.ut_su.i >$f.ut_su.o &
- if test $t_ut_td_simple = yes; then grepc_ut_td_simple "$1"; else cat >/dev/null & printf ''; fi <$f.ut_td_simple.i >$f.ut_td_simple.o &
- if test $t_ut_td_su = yes; then grepc_ut_td_su "$1"; else cat >/dev/null & printf ''; fi <$f.ut_td_su.i >$f.ut_td_su.o &
- tee $fi >/dev/null;
- wait;
- rm $fi $fo;
grepc_parse_cmd "$@" </dev/null;
diff --git a/share/man/man1/grepc.1 b/share/man/man1/grepc.1
index 2d9cde5..e80c648 100644
--- a/share/man/man1/grepc.1
+++ b/share/man/man1/grepc.1
@@ -151,6 +151,13 @@ This option can be passed multiple times
to search for various types of code.
.BR "e f m t" .
+In some cases,
+internal calls to
+.MR pcre2grep 1
+may fail after consuming too much resources.
+To solve that,
+restrict the "types of code" of your search.
.RB \(ti/src/nginx/unit$ " grepc nxt_sprintf;"