Don't waste time fuzzing programs with large substrings.

While I'm here, dial the fanout limits back up.

Change-Id: I3450280bab86a4ed4273f54e0e01f50f44b93841
Reviewed-on: https://code-review.googlesource.com/c/34790
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 978fb76..7f666d2 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -7,6 +7,8 @@
 #include <map>
 #include <string>
 
+#include "re2/prefilter.h"
+#include "re2/prefilter_tree.h"
 #include "re2/re2.h"
 
 using re2::StringPiece;
@@ -21,7 +23,7 @@
     return;
 
   // Don't waste time fuzzing high-size programs.
-  // (They can cause bug reports due to fuzzer timeouts.)
+  // They can cause bug reports due to fuzzer timeouts.
   int size = re.ProgramSize();
   if (size > 9999)
     return;
@@ -30,13 +32,29 @@
     return;
 
   // Don't waste time fuzzing high-fanout programs.
-  // (They can also cause bug reports due to fuzzer timeouts.)
+  // They can cause bug reports due to fuzzer timeouts.
   std::map<int, int> histogram;
   int fanout = re.ProgramFanout(&histogram);
-  if (fanout > 7)
+  if (fanout > 9)
     return;
   int rfanout = re.ReverseProgramFanout(&histogram);
-  if (rfanout > 7)
+  if (rfanout > 9)
+    return;
+
+  // Don't waste time fuzzing programs with large substrings.
+  // They can cause bug reports due to fuzzer timeouts when they
+  // are repetitions (e.g. hundreds of NUL bytes) and matching is
+  // unanchored. And they aren't interesting for fuzzing purposes.
+  // Prefilter and PrefilterTree are used because FilteredRE2 will
+  // compile its own RE2 object, which would be a waste of effort.
+  re2::PrefilterTree prefilter_tree(/*min_atom_len=*/10);  // > 9
+  re2::Prefilter* prefilter = re2::Prefilter::FromRE2(&re);
+  if (prefilter == NULL)
+    return;
+  prefilter_tree.Add(prefilter);  // takes ownership
+  std::vector<string> atoms;
+  prefilter_tree.Compile(&atoms);
+  if (!atoms.empty())
     return;
 
   StringPiece sp1, sp2, sp3, sp4;