Make the fuzzer limit reverse program size and fanout.

Change-Id: I4504e7c06a3b973500c80ae7eaeade24d11a0c45
Reviewed-on: https://code-review.googlesource.com/33850
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 3ce4d1b..cde76e4 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -23,14 +23,16 @@
   // Don't waste time fuzzing high-size programs.
   // (They can cause bug reports due to fuzzer timeouts.)
   int size = re.ProgramSize();
-  if (size > 9999)
+  int rsize = re.ReverseProgramSize();
+  if (size > 9999 || rsize > 9999)
     return;
 
   // Don't waste time fuzzing high-fanout programs.
   // (They can also cause bug reports due to fuzzer timeouts.)
   std::map<int, int> histogram;
   int fanout = re.ProgramFanout(&histogram);
-  if (fanout > 9)
+  int rfanout = re.ReverseProgramFanout(&histogram);
+  if (fanout > 9 || rfanout > 9)
     return;
 
   StringPiece sp1, sp2, sp3, sp4;
diff --git a/re2/re2.cc b/re2/re2.cc
index 89c67ec..f3f96ae 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -262,11 +262,18 @@
   return prog_->size();
 }
 
-int RE2::ProgramFanout(std::map<int, int>* histogram) const {
+int RE2::ReverseProgramSize() const {
   if (prog_ == NULL)
     return -1;
-  SparseArray<int> fanout(prog_->size());
-  prog_->Fanout(&fanout);
+  Prog* prog = ReverseProg();
+  if (prog == NULL)
+    return -1;
+  return prog->size();
+}
+
+static int Fanout(Prog* prog, std::map<int, int>* histogram) {
+  SparseArray<int> fanout(prog->size());
+  prog->Fanout(&fanout);
   histogram->clear();
   for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
     // TODO(junyer): Optimise this?
@@ -279,6 +286,21 @@
   return histogram->rbegin()->first;
 }
 
+int RE2::ProgramFanout(std::map<int, int>* histogram) const {
+  if (prog_ == NULL)
+    return -1;
+  return Fanout(prog_, histogram);
+}
+
+int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const {
+  if (prog_ == NULL)
+    return -1;
+  Prog* prog = ReverseProg();
+  if (prog == NULL)
+    return -1;
+  return Fanout(prog, histogram);
+}
+
 // Returns num_captures_, computing it if needed, or -1 if the
 // regexp wasn't valid on construction.
 int RE2::NumberOfCapturingGroups() const {
diff --git a/re2/re2.h b/re2/re2.h
index 2a012e1..4be959f 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -278,11 +278,13 @@
   // Returns the program size, a very approximate measure of a regexp's "cost".
   // Larger numbers are more expensive than smaller numbers.
   int ProgramSize() const;
+  int ReverseProgramSize() const;
 
   // EXPERIMENTAL! SUBJECT TO CHANGE!
   // Outputs the program fanout as a histogram bucketed by powers of 2.
   // Returns the number of the largest non-empty bucket.
   int ProgramFanout(std::map<int, int>* histogram) const;
+  int ReverseProgramFanout(std::map<int, int>* histogram) const;
 
   // Returns the underlying Regexp; not for general use.
   // Returns entire_regexp_ so that callers don't need
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index b847325..cae956c 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -461,6 +461,10 @@
   ASSERT_GT(re_simple.ProgramSize(), 0);
   ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
   ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
+
+  ASSERT_GT(re_simple.ReverseProgramSize(), 0);
+  ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
+  ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
 }
 
 TEST(ProgramFanout, BigProgram) {
@@ -486,6 +490,23 @@
   // 13 is the largest non-empty bucket and has 1000 elements.
   ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
   ASSERT_EQ(1000, histogram[13]);
+
+  // 2 is the largest non-empty bucket and has 3 elements.
+  // This differs from the others due to how reverse `.' works.
+  ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
+  ASSERT_EQ(3, histogram[2]);
+
+  // 5 is the largest non-empty bucket and has 10 elements.
+  ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
+  ASSERT_EQ(10, histogram[5]);
+
+  // 9 is the largest non-empty bucket and has 100 elements.
+  ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
+  ASSERT_EQ(100, histogram[9]);
+
+  // 12 is the largest non-empty bucket and has 1000 elements.
+  ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
+  ASSERT_EQ(1000, histogram[12]);
 }
 
 // Issue 956519: handling empty character sets was