Don't waste time fuzzing programs with large substrings. While I'm here, dial the fanout limits back up. Change-Id: I3450280bab86a4ed4273f54e0e01f50f44b93841 Reviewed-on: https://code-review.googlesource.com/c/34790 Reviewed-by: Paul Wankadia <junyer@google.com>

commit: 22caec62055a7707cf5801bebb6028d06220f2b6 [log] [tgz]
author: Paul Wankadia <junyer@google.com> Mon Oct 29 22:58:45 2018 -0700
committer: Paul Wankadia <junyer@google.com> Tue Oct 30 06:01:24 2018 +0000
tree: e95141ff93fc7047ddd0ebd1a170b10736b87a36
parent: 89528a380a7e9722dbf7b5a817251049eab355fb [diff]
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 978fb76..7f666d2 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc

@@ -7,6 +7,8 @@
 #include <map>
 #include <string>
 
+#include "re2/prefilter.h"
+#include "re2/prefilter_tree.h"
 #include "re2/re2.h"
 
 using re2::StringPiece;
@@ -21,7 +23,7 @@
     return;
 
   // Don't waste time fuzzing high-size programs.
-  // (They can cause bug reports due to fuzzer timeouts.)
+  // They can cause bug reports due to fuzzer timeouts.
   int size = re.ProgramSize();
   if (size > 9999)
     return;
@@ -30,13 +32,29 @@
     return;
 
   // Don't waste time fuzzing high-fanout programs.
-  // (They can also cause bug reports due to fuzzer timeouts.)
+  // They can cause bug reports due to fuzzer timeouts.
   std::map<int, int> histogram;
   int fanout = re.ProgramFanout(&histogram);
-  if (fanout > 7)
+  if (fanout > 9)
     return;
   int rfanout = re.ReverseProgramFanout(&histogram);
-  if (rfanout > 7)
+  if (rfanout > 9)
+    return;
+
+  // Don't waste time fuzzing programs with large substrings.
+  // They can cause bug reports due to fuzzer timeouts when they
+  // are repetitions (e.g. hundreds of NUL bytes) and matching is
+  // unanchored. And they aren't interesting for fuzzing purposes.
+  // Prefilter and PrefilterTree are used because FilteredRE2 will
+  // compile its own RE2 object, which would be a waste of effort.
+  re2::PrefilterTree prefilter_tree(/*min_atom_len=*/10);  // > 9
+  re2::Prefilter* prefilter = re2::Prefilter::FromRE2(&re);
+  if (prefilter == NULL)
+    return;
+  prefilter_tree.Add(prefilter);  // takes ownership
+  std::vector<string> atoms;
+  prefilter_tree.Compile(&atoms);
+  if (!atoms.empty())
     return;
 
   StringPiece sp1, sp2, sp3, sp4;
commit	22caec62055a7707cf5801bebb6028d06220f2b6	[log] [tgz]
author	Paul Wankadia <junyer@google.com>	Mon Oct 29 22:58:45 2018 -0700
committer	Paul Wankadia <junyer@google.com>	Tue Oct 30 06:01:24 2018 +0000
tree	e95141ff93fc7047ddd0ebd1a170b10736b87a36
parent	89528a380a7e9722dbf7b5a817251049eab355fb [diff]