Fix SimplifyStringSet() for really reals. And the test.

Change-Id: I4c211f8f4eea0a5b2afcbb85319a8467b7f29398
Reviewed-on: https://code-review.googlesource.com/c/re2/+/42518
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/prefilter.cc b/re2/prefilter.cc
index 2ad8c60..f61d54b 100644
--- a/re2/prefilter.cc
+++ b/re2/prefilter.cc
@@ -147,14 +147,11 @@
   // is because, when we are performing a string search to filter
   // regexps, matching "ab" will already allow this regexp to be a
   // candidate for match, so further matching "abc" is redundant.
-  // Note that we must erase "" because find() would find it at the
+  // Note that we must ignore "" because find() would find it at the
   // start of everything and thus we would end up erasing everything.
-  SSIter i = ss->begin();
-  while (i != ss->end()) {
-    if (i->empty()) {
-      i = ss->erase(i);
+  for (SSIter i = ss->begin(); i != ss->end(); ++i) {
+    if (i->empty())
       continue;
-    }
     SSIter j = i;
     ++j;
     while (j != ss->end()) {
@@ -164,7 +161,6 @@
       }
       ++j;
     }
-    ++i;
   }
 }
 
diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc
index 647ddeb..e54caf7 100644
--- a/re2/testing/filtered_re2_test.cc
+++ b/re2/testing/filtered_re2_test.cc
@@ -103,11 +103,9 @@
   }, {
     // Test to make sure that any atoms that have another atom as a
     // substring in an OR are removed; that is, only the shortest
-    // substring is kept. The (|... tests that "" does not cause the
-    // removal of every other atom in the OR, which used to happen
-    // for two separate reasons.
+    // substring is kept.
     "SubstrAtomRemovesSuperStrInOr", {
-      "(|abc123|abc|ghi789|abc1234).*[x-z]+",
+      "(abc123|abc|ghi789|abc1234).*[x-z]+",
       "abcd..yyy..yyyzzz",
       "mnmnpp[a-z]+PPP"
     }, {
@@ -279,4 +277,18 @@
   EXPECT_EQ(2, matching_regexps.size());
 }
 
+TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
+  // Bug due to find() finding "" at the start of everything in a string
+  // set and thus SimplifyStringSet() would end up erasing everything.
+  // In order to test this, we have to keep PrefilterTree from discarding
+  // the OR entirely, so we have to make the minimum atom length zero.
+
+  FilterTestVars v(0);  // override the minimum atom length
+  const char* regexps[] = {"-R.+(|ADD=;AA){12}}"};
+  const char* atoms[] = {"", "-r", "add=;aa", "}"};
+  AddRegexpsAndCompile(regexps, arraysize(regexps), &v);
+  EXPECT_TRUE(CheckExpectedAtoms(atoms, arraysize(atoms),
+                                 "EmptyStringInStringSetBug", &v));
+}
+
 }  //  namespace re2