Make RequiredPrefixForAccel() "see through" capturing groups.
Fixes #289.
Change-Id: Id0c888e3cbe898f8c6f7858b8bde05f38c787b73
Reviewed-on: https://code-review.googlesource.com/c/re2/+/58131
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/regexp.cc b/re2/regexp.cc
index 574780f..9655dc3 100644
--- a/re2/regexp.cc
+++ b/re2/regexp.cc
@@ -722,8 +722,14 @@
*foldcase = false;
// No need for a walker: the regexp must either begin with or be
- // a literal char or string.
+ // a literal char or string. We "see through" capturing groups,
+ // but make no effort to glue multiple prefix fragments together.
Regexp* re = op_ == kRegexpConcat && nsub_ > 0 ? sub()[0] : this;
+ while (re->op_ == kRegexpCapture) {
+ re = re->sub()[0];
+ if (re->op_ == kRegexpConcat && re->nsub_ > 0)
+ re = re->sub()[0];
+ }
if (re->op_ != kRegexpLiteral &&
re->op_ != kRegexpLiteralString)
return false;
diff --git a/re2/testing/required_prefix_test.cc b/re2/testing/required_prefix_test.cc
index c00e812..023d242 100644
--- a/re2/testing/required_prefix_test.cc
+++ b/re2/testing/required_prefix_test.cc
@@ -30,8 +30,8 @@
// If the regexp immediately goes into
// something not a literal match, there's no required prefix.
- { "^(abc)", false },
{ "^a*", false },
+ { "^(abc)", false },
// Otherwise, it should work.
{ "^abc$", true, "abc", false, "(?-m:$)" },
@@ -84,17 +84,25 @@
// If the regexp immediately goes into
// something not a literal match, there's no required prefix.
- { "(abc)", false },
{ "a*", false },
+ // Unlike RequiredPrefix(), RequiredPrefixForAccel() can "see through"
+ // capturing groups, but doesn't try to glue prefix fragments together.
+ { "(a?)def", false },
+ { "(ab?)def", true, "a", false },
+ { "(abc?)def", true, "ab", false },
+ { "(()a)def", false },
+ { "((a)b)def", true, "a", false },
+ { "((ab)c)def", true, "ab", false },
+
// Otherwise, it should work.
- { "abc$", true, "abc", false, },
- { "abc", true, "abc", false, },
- { "(?i)abc", true, "abc", true, },
- { "abcd*", true, "abc", false, },
- { "[Aa][Bb]cd*", true, "ab", true, },
- { "ab[Cc]d*", true, "ab", false, },
- { "☺abc", true, "☺abc", false, },
+ { "abc$", true, "abc", false },
+ { "abc", true, "abc", false },
+ { "(?i)abc", true, "abc", true },
+ { "abcd*", true, "abc", false },
+ { "[Aa][Bb]cd*", true, "ab", true },
+ { "ab[Cc]d*", true, "ab", false },
+ { "☺abc", true, "☺abc", false },
};
TEST(RequiredPrefixForAccel, SimpleTests) {