Greatly simplify and speed up regexec0() using REG_STARTEND.
This is a 15 year old freebsd extension (presumably thus also available on MacOS) that glibc adopted in 2004, uClibc adopted in 2005, and bionic supports. The only thing that DOESN'T support it is musl, once again because its maintainer explicitly decided not to (https://www.openwall.com/lists/musl/2013/01/15/26), so add an #ifdef to let musl stay uniquely broken. (It'll stop at first NUL, everything else can match NULs). Finally fixes "s/x/y/g on a megabyte line of x's takes forever" issue.
This commit is contained in:
parent
eb318d5b03
commit
48162c4ee3
37
lib/lib.c
37
lib/lib.c
@ -1317,39 +1317,16 @@ int readlink0(char *path, char *buf, int len)
|
||||
return readlinkat0(AT_FDCWD, path, buf, len);
|
||||
}
|
||||
|
||||
// Do regex matching handling embedded NUL bytes in string (hence extra len
|
||||
// argument). Note that neither the pattern nor the match can currently include
|
||||
// NUL bytes (even with wildcards) and string must be null terminated at
|
||||
// string[len]. But this can find a match after the first NUL.
|
||||
// Do regex matching with len argument to handle embedded NUL bytes in string
|
||||
int regexec0(regex_t *preg, char *string, long len, int nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
regmatch_t *pmatch, int eflags)
|
||||
{
|
||||
char *s = string;
|
||||
regmatch_t backup;
|
||||
|
||||
for (;;) {
|
||||
int rc = regexec(preg, s, nmatch, pmatch, eflags);
|
||||
|
||||
// check for match
|
||||
if (!rc) {
|
||||
for (rc = 0; rc<nmatch && pmatch[rc].rm_so!=-1; rc++) {
|
||||
pmatch[rc].rm_so += s-string;
|
||||
pmatch[rc].rm_eo += s-string;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// advance past NUL bytes and try again
|
||||
while (len && *s) {
|
||||
s++;
|
||||
len--;
|
||||
}
|
||||
while (len && !*s) {
|
||||
s++;
|
||||
len--;
|
||||
}
|
||||
if (!len) return REG_NOMATCH;
|
||||
}
|
||||
if (!nmatch) pmatch = &backup;
|
||||
pmatch->rm_so = 0;
|
||||
pmatch->rm_eo = len;
|
||||
return regexec(preg, string, nmatch, pmatch, eflags|REG_STARTEND);
|
||||
}
|
||||
|
||||
// Return user name or string representation of number, returned buffer
|
||||
|
@ -6,6 +6,9 @@
|
||||
|
||||
// For musl
|
||||
#define _ALL_SOURCE
|
||||
#ifndef REG_STARTEND
|
||||
#define REG_STARTEND 0
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
// macOS 10.13 doesn't have the POSIX 2008 direct access to timespec in
|
||||
|
@ -176,4 +176,10 @@ testing '\n with empty capture' \
|
||||
testing '\n too high' \
|
||||
'sed -E "s/(.*)/\2/p" 2>/dev/null || echo OK' "OK\n" "" "foo"
|
||||
|
||||
# Performance test
|
||||
X=x; Y=20; while [ $Y -gt 0 ]; do X=$X$X; Y=$(($Y-1)); done
|
||||
testing 'megabyte s/x/y/g (5 sec timeout)' "timeout 5 sed 's/x/y/g' | sha1sum" \
|
||||
'138c1fa7c3f64186203b0192fb4abdb33cb4e98a -\n' '' "$X\n"
|
||||
unset X Y
|
||||
|
||||
# -i with $ last line test
|
||||
|
Loading…
Reference in New Issue
Block a user