Greatly simplify and speed up regexec0() using REG_STARTEND.

This is a 15 year old freebsd extension (presumably thus also available on
MacOS) that glibc adopted in 2004, uClibc adopted in 2005, and bionic
supports. The only thing that DOESN'T support it is musl, once again
because its maintainer explicitly decided not to
(https://www.openwall.com/lists/musl/2013/01/15/26), so add an #ifdef
to let musl stay uniquely broken. (It'll stop at first NUL, everything
else can match NULs).

Finally fixes "s/x/y/g on a megabyte line of x's takes forever" issue.
This commit is contained in:
Rob Landley 2019-05-06 13:16:24 -05:00
parent eb318d5b03
commit 48162c4ee3
3 changed files with 16 additions and 30 deletions

View File

@ -1317,39 +1317,16 @@ int readlink0(char *path, char *buf, int len)
return readlinkat0(AT_FDCWD, path, buf, len);
}
// Do regex matching handling embedded NUL bytes in string (hence extra len
// argument). Note that neither the pattern nor the match can currently include
// NUL bytes (even with wildcards) and string must be null terminated at
// string[len]. But this can find a match after the first NUL.
// Do regex matching with len argument to handle embedded NUL bytes in string
int regexec0(regex_t *preg, char *string, long len, int nmatch,
regmatch_t pmatch[], int eflags)
regmatch_t *pmatch, int eflags)
{
char *s = string;
regmatch_t backup;
for (;;) {
int rc = regexec(preg, s, nmatch, pmatch, eflags);
// check for match
if (!rc) {
for (rc = 0; rc<nmatch && pmatch[rc].rm_so!=-1; rc++) {
pmatch[rc].rm_so += s-string;
pmatch[rc].rm_eo += s-string;
}
return 0;
}
// advance past NUL bytes and try again
while (len && *s) {
s++;
len--;
}
while (len && !*s) {
s++;
len--;
}
if (!len) return REG_NOMATCH;
}
if (!nmatch) pmatch = &backup;
pmatch->rm_so = 0;
pmatch->rm_eo = len;
return regexec(preg, string, nmatch, pmatch, eflags|REG_STARTEND);
}
// Return user name or string representation of number, returned buffer

View File

@ -6,6 +6,9 @@
// For musl
#define _ALL_SOURCE
#ifndef REG_STARTEND
#define REG_STARTEND 0
#endif
#ifdef __APPLE__
// macOS 10.13 doesn't have the POSIX 2008 direct access to timespec in

View File

@ -176,4 +176,10 @@ testing '\n with empty capture' \
testing '\n too high' \
'sed -E "s/(.*)/\2/p" 2>/dev/null || echo OK' "OK\n" "" "foo"
# Performance test
X=x; Y=20; while [ $Y -gt 0 ]; do X=$X$X; Y=$(($Y-1)); done
testing 'megabyte s/x/y/g (5 sec timeout)' "timeout 5 sed 's/x/y/g' | sha1sum" \
'138c1fa7c3f64186203b0192fb4abdb33cb4e98a -\n' '' "$X\n"
unset X Y
# -i with $ last line test