blob: 01ab41ee38e3c60967cd530b2cdd934b7fa0ca3b [file] [log] [blame]
// Copyright 2008 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "util/test.h"
#include "util/logging.h"
#include "re2/prog.h"
#include "re2/regexp.h"
namespace re2 {
struct PCRETest {
const char* regexp;
bool should_match;
};
static PCRETest tests[] = {
// Most things should behave exactly.
{ "abc", true },
{ "(a|b)c", true },
{ "(a*|b)c", true },
{ "(a|b*)c", true },
{ "a(b|c)d", true },
{ "a(()|())c", true },
{ "ab*c", true },
{ "ab+c", true },
{ "a(b*|c*)d", true },
{ "\\W", true },
{ "\\W{1,2}", true },
{ "\\d", true },
// Check that repeated empty strings do not.
{ "(a*)*", false },
{ "x(a*)*y", false },
{ "(a*)+", false },
{ "(a+)*", true },
{ "(a+)+", true },
{ "(a+)+", true },
// \v is the only character class that shouldn't.
{ "\\b", true },
{ "\\v", false },
{ "\\d", true },
// The handling of ^ in multi-line mode is different, as is
// the handling of $ in single-line mode. (Both involve
// boundary cases if the string ends with \n.)
{ "\\A", true },
{ "\\z", true },
{ "(?m)^", false },
{ "(?m)$", true },
{ "(?-m)^", true },
{ "(?-m)$", false }, // In PCRE, == \Z
{ "(?m)\\A", true },
{ "(?m)\\z", true },
{ "(?-m)\\A", true },
{ "(?-m)\\z", true },
};
TEST(MimicsPCRE, SimpleTests) {
for (size_t i = 0; i < arraysize(tests); i++) {
const PCRETest& t = tests[i];
for (size_t j = 0; j < 2; j++) {
Regexp::ParseFlags flags = Regexp::LikePerl;
if (j == 0)
flags = flags | Regexp::Latin1;
Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
ASSERT_TRUE(re != NULL) << " " << t.regexp;
ASSERT_EQ(t.should_match, re->MimicsPCRE())
<< " " << t.regexp << " "
<< (j == 0 ? "latin1" : "utf");
re->Decref();
}
}
}
} // namespace re2