Teach sed s/// how to handle [:space:] type sequences.

Or more accurately, s@[[:space:]@]@replace@ which can't treat the @ in [] as a delimiter but has to know about nested [[]] to make that decision.
2016-09-06 00:14:24 -05:00 · 2016-09-06 00:14:24 -05:00 · 337c072ac0
commit 337c072ac0
parent eed9ed41aa
2 changed files with 18 additions and 5 deletions
--- a/tests/sed.test
+++ b/tests/sed.test
@ -57,6 +57,8 @@ testing 'multiple regex address match' 'sed -n /on/,/off/p' \
 	'zap\nbone\nturtle\scoff\nfred\ntron\nlurid\noffer\nbecause\n'
 testing 'regex address overlap' 'sed -n /on/,/off/p' "on\nzap\noffon\n" "" \
 	'on\nzap\noffon\nping\noff\n'
+testing 'getdelim with nested [:blah:]' 'sed -n "sa\a[a[:space:]bc]*aXXagp"' \
+	"ABXXCDXXEFXXGHXXIXX" "" "ABaaCDa EFaa aGHa a Ia "

 # gGhHlnNpPqrstwxy:=
 # s///#comment
--- a/toys/posix/sed.c
+++ b/toys/posix/sed.c
@ -657,6 +657,7 @@ static char *unescape_delimited_string(char **pstr, char *delim)
 {
  char *to, *from, mode = 0, d;

+  // Grab leading delimiter (if necessary), allocate space for new string
  from = *pstr;
  if (!delim || !*delim) {
    if (!(d = *(from++))) return 0;
@ -670,13 +671,23 @@ static char *unescape_delimited_string(char **pstr, char *delim)
    if (!*from) return 0;

    // delimiter in regex character range doesn't count
-    if (!mode && *from == '[') {
-      mode = '[';
-      if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
-    } else if (mode && *from == ']') mode = 0;
+    if (*from == '[') {
+      if (!mode) {
+        mode = ']';
+        if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
+      } else if (mode == ']' && strchr(".=:", from[1])) {
+        *(to++) = *(from++);
+        mode = *from;
+      }
+    } else if (*from == mode) {
+      if (mode == ']') mode = 0;
+      else {
+        *(to++) = *(from++);
+        mode = ']';
+      }
    // Length 1 range (X-X with same X) is "undefined" and makes regcomp err,
    // but the perl build does it, so we need to filter it out.
-    else if (mode && *from == '-' && from[-1] == from[1]) {
+    } else if (mode && *from == '-' && from[-1] == from[1]) {
      from+=2;
      continue;
    } else if (*from == '\\') {