Make the fuzzer limit reverse program size and fanout.
Change-Id: I4504e7c06a3b973500c80ae7eaeade24d11a0c45
Reviewed-on: https://code-review.googlesource.com/33850
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 3ce4d1b..cde76e4 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -23,14 +23,16 @@
// Don't waste time fuzzing high-size programs.
// (They can cause bug reports due to fuzzer timeouts.)
int size = re.ProgramSize();
- if (size > 9999)
+ int rsize = re.ReverseProgramSize();
+ if (size > 9999 || rsize > 9999)
return;
// Don't waste time fuzzing high-fanout programs.
// (They can also cause bug reports due to fuzzer timeouts.)
std::map<int, int> histogram;
int fanout = re.ProgramFanout(&histogram);
- if (fanout > 9)
+ int rfanout = re.ReverseProgramFanout(&histogram);
+ if (fanout > 9 || rfanout > 9)
return;
StringPiece sp1, sp2, sp3, sp4;
diff --git a/re2/re2.cc b/re2/re2.cc
index 89c67ec..f3f96ae 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -262,11 +262,18 @@
return prog_->size();
}
-int RE2::ProgramFanout(std::map<int, int>* histogram) const {
+int RE2::ReverseProgramSize() const {
if (prog_ == NULL)
return -1;
- SparseArray<int> fanout(prog_->size());
- prog_->Fanout(&fanout);
+ Prog* prog = ReverseProg();
+ if (prog == NULL)
+ return -1;
+ return prog->size();
+}
+
+static int Fanout(Prog* prog, std::map<int, int>* histogram) {
+ SparseArray<int> fanout(prog->size());
+ prog->Fanout(&fanout);
histogram->clear();
for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
// TODO(junyer): Optimise this?
@@ -279,6 +286,21 @@
return histogram->rbegin()->first;
}
+int RE2::ProgramFanout(std::map<int, int>* histogram) const {
+ if (prog_ == NULL)
+ return -1;
+ return Fanout(prog_, histogram);
+}
+
+int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const {
+ if (prog_ == NULL)
+ return -1;
+ Prog* prog = ReverseProg();
+ if (prog == NULL)
+ return -1;
+ return Fanout(prog, histogram);
+}
+
// Returns num_captures_, computing it if needed, or -1 if the
// regexp wasn't valid on construction.
int RE2::NumberOfCapturingGroups() const {
diff --git a/re2/re2.h b/re2/re2.h
index 2a012e1..4be959f 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -278,11 +278,13 @@
// Returns the program size, a very approximate measure of a regexp's "cost".
// Larger numbers are more expensive than smaller numbers.
int ProgramSize() const;
+ int ReverseProgramSize() const;
// EXPERIMENTAL! SUBJECT TO CHANGE!
// Outputs the program fanout as a histogram bucketed by powers of 2.
// Returns the number of the largest non-empty bucket.
int ProgramFanout(std::map<int, int>* histogram) const;
+ int ReverseProgramFanout(std::map<int, int>* histogram) const;
// Returns the underlying Regexp; not for general use.
// Returns entire_regexp_ so that callers don't need
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index b847325..cae956c 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -461,6 +461,10 @@
ASSERT_GT(re_simple.ProgramSize(), 0);
ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
+
+ ASSERT_GT(re_simple.ReverseProgramSize(), 0);
+ ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
+ ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
}
TEST(ProgramFanout, BigProgram) {
@@ -486,6 +490,23 @@
// 13 is the largest non-empty bucket and has 1000 elements.
ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
ASSERT_EQ(1000, histogram[13]);
+
+ // 2 is the largest non-empty bucket and has 3 elements.
+ // This differs from the others due to how reverse `.' works.
+ ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
+ ASSERT_EQ(3, histogram[2]);
+
+ // 5 is the largest non-empty bucket and has 10 elements.
+ ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
+ ASSERT_EQ(10, histogram[5]);
+
+ // 9 is the largest non-empty bucket and has 100 elements.
+ ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
+ ASSERT_EQ(100, histogram[9]);
+
+ // 12 is the largest non-empty bucket and has 1000 elements.
+ ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
+ ASSERT_EQ(1000, histogram[12]);
}
// Issue 956519: handling empty character sets was