Distinguish between missing ')' and unexpected ')'.
Change-Id: I0701f28b598536009b9dba3f34cdbbbafb9a7407
Reviewed-on: https://code-review.googlesource.com/c/re2/+/57412
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/parse.cc b/re2/parse.cc
index e741603..3bba613 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -685,7 +685,7 @@
if ((r1 = stacktop_) == NULL ||
(r2 = r1->down_) == NULL ||
r2->op() != kLeftParen) {
- status_->set_code(kRegexpMissingParen);
+ status_->set_code(kRegexpUnexpectedParen);
status_->set_error_arg(whole_regexp_);
return false;
}
diff --git a/re2/re2.cc b/re2/re2.cc
index 35fd1cc..7ec193c 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -83,6 +83,8 @@
return RE2::ErrorMissingBracket;
case re2::kRegexpMissingParen:
return RE2::ErrorMissingParen;
+ case re2::kRegexpUnexpectedParen:
+ return RE2::ErrorUnexpectedParen;
case re2::kRegexpTrailingBackslash:
return RE2::ErrorTrailingBackslash;
case re2::kRegexpRepeatArgument:
diff --git a/re2/re2.h b/re2/re2.h
index 55e732b..9d3496c 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -247,6 +247,7 @@
ErrorBadCharRange, // bad character class range
ErrorMissingBracket, // missing closing ]
ErrorMissingParen, // missing closing )
+ ErrorUnexpectedParen, // unexpected closing )
ErrorTrailingBackslash, // trailing \ at end of regexp
ErrorRepeatArgument, // repeat argument missing, e.g. "*"
ErrorRepeatSize, // bad repetition argument
diff --git a/re2/regexp.cc b/re2/regexp.cc
index 4364d0a..574780f 100644
--- a/re2/regexp.cc
+++ b/re2/regexp.cc
@@ -498,6 +498,7 @@
"invalid character class range",
"missing ]",
"missing )",
+ "unexpected )",
"trailing \\",
"no argument for repetition operator",
"invalid repetition size",
diff --git a/re2/regexp.h b/re2/regexp.h
index 5284ab5..9ea7a07 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -177,6 +177,7 @@
kRegexpBadCharRange, // bad character class range
kRegexpMissingBracket, // missing closing ]
kRegexpMissingParen, // missing closing )
+ kRegexpUnexpectedParen, // unexpected closing )
kRegexpTrailingBackslash, // at end of regexp
kRegexpRepeatArgument, // repeat argument missing, e.g. "*"
kRegexpRepeatSize, // bad repetition argument
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index ec457fb..41fccf6 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -1277,38 +1277,43 @@
EXPECT_EQ(val, "1,0x2F,030,4,5");
}
-
// Check that RE2 returns correct regexp pieces on error.
// In particular, make sure it returns whole runes
// and that it always reports invalid UTF-8.
// Also check that Perl error flag piece is big enough.
static struct ErrorTest {
const char *regexp;
- const char *error;
+ RE2::ErrorCode error_code;
+ const char *error_arg;
} error_tests[] = {
- { "ab\\αcd", "\\α" },
- { "ef\\x☺01", "\\x☺0" },
- { "gh\\x1☺01", "\\x1☺" },
- { "ij\\x1", "\\x1" },
- { "kl\\x", "\\x" },
- { "uv\\x{0000☺}", "\\x{0000☺" },
- { "wx\\p{ABC", "\\p{ABC" },
- { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X
- { "aa(?sm☺i", "(?sm☺" },
- { "bb[abc", "[abc" },
+ { "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
+ { "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
+ { "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
+ { "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
+ { "kl\\x", RE2::ErrorBadEscape, "\\x" },
+ { "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
+ { "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
+ // used to return (?s but the error is X
+ { "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
+ { "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
+ { "bb[abc", RE2::ErrorMissingBracket, "[abc" },
+ { "abc(def", RE2::ErrorMissingParen, "abc(def" },
+ { "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },
- { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8
- { "op\377qr", "" },
- { "st\\x{00000\377", "" },
- { "zz\\p{\377}", "" },
- { "zz\\x{00\377}", "" },
- { "zz(?P<name\377>abc)", "" },
+ // no argument string returned for invalid UTF-8
+ { "mn\\x1\377", RE2::ErrorBadUTF8, "" },
+ { "op\377qr", RE2::ErrorBadUTF8, "" },
+ { "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
+ { "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
+ { "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
+ { "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
};
-TEST(RE2, ErrorArgs) {
+TEST(RE2, ErrorCodeAndArg) {
for (size_t i = 0; i < arraysize(error_tests); i++) {
RE2 re(error_tests[i].regexp, RE2::Quiet);
EXPECT_FALSE(re.ok());
- EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
+ EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
+ EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
}
}