blob: 978fb7690557bd5414f77aef2403f16fbcc78666 [file] [log] [blame]
// Copyright 2016 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <map>
#include <string>
#include "re2/re2.h"
using re2::StringPiece;
using std::string;
// NOT static, NOT signed.
uint8_t dummy = 0;
void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
RE2 re(pattern, options);
if (!re.ok())
return;
// Don't waste time fuzzing high-size programs.
// (They can cause bug reports due to fuzzer timeouts.)
int size = re.ProgramSize();
if (size > 9999)
return;
int rsize = re.ReverseProgramSize();
if (rsize > 9999)
return;
// Don't waste time fuzzing high-fanout programs.
// (They can also cause bug reports due to fuzzer timeouts.)
std::map<int, int> histogram;
int fanout = re.ProgramFanout(&histogram);
if (fanout > 7)
return;
int rfanout = re.ReverseProgramFanout(&histogram);
if (rfanout > 7)
return;
StringPiece sp1, sp2, sp3, sp4;
string s1, s2, s3, s4;
int i1, i2, i3, i4;
double d1, d2, d3, d4;
RE2::FullMatch(text, re, &sp1, &sp2, &sp3, &sp4);
RE2::PartialMatch(text, re, &s1, &s2, &s3, &s4);
sp1 = sp2 = text;
RE2::Consume(&sp1, re, &i1, &i2, &i3, &i4);
RE2::FindAndConsume(&sp2, re, &d1, &d2, &d3, &d4);
s3 = s4 = string(text);
RE2::Replace(&s3, re, "");
RE2::GlobalReplace(&s4, re, "");
// Exercise some other API functionality.
dummy += re.NumberOfCapturingGroups();
dummy += RE2::QuoteMeta(pattern).size();
}
// Entry point for libFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size == 0 || size > 999)
return 0;
// Crudely limit the use of ., \p and \P.
// Otherwise, we will waste time on inputs that have long runs of Unicode
// character classes. The fuzzer has shown itself to be easily capable of
// generating such patterns that fall within the other limits, but result
// in timeouts nonetheless. The marginal cost is high - even more so when
// counted repetition is involved - whereas the marginal benefit is zero.
int dot = 0;
int backslash_p = 0;
for (size_t i = 0; i < size; i++) {
if (data[i] == '.')
dot++;
if (data[i] != '\\')
continue;
i++;
if (i >= size)
break;
if (data[i] == 'p' || data[i] == 'P')
backslash_p++;
}
if (dot > 99)
return 0;
if (backslash_p > 1)
return 0;
// The one-at-a-time hash by Bob Jenkins.
uint32_t hash = 0;
for (size_t i = 0; i < size; i++) {
hash += data[i];
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
RE2::Options options;
options.set_log_errors(false);
options.set_max_mem(64 << 20);
options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1
: RE2::Options::EncodingUTF8);
options.set_posix_syntax(hash & 2);
options.set_longest_match(hash & 4);
options.set_literal(hash & 8);
options.set_never_nl(hash & 16);
options.set_dot_nl(hash & 32);
options.set_never_capture(hash & 64);
options.set_case_sensitive(hash & 128);
options.set_perl_classes(hash & 256);
options.set_word_boundary(hash & 512);
options.set_one_line(hash & 1024);
const char* ptr = reinterpret_cast<const char*>(data);
int len = static_cast<int>(size);
StringPiece pattern(ptr, len);
StringPiece text(ptr, len);
Test(pattern, options, text);
return 0;
}