Don't use a SparseSet when matches aren't needed.
Change-Id: I2092f2b238b2cafd9259655273f8aee15283ac02
Reviewed-on: https://code-review.googlesource.com/17630
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 390aa9c..71e1442 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -1341,7 +1341,7 @@
lastmatch = p;
if (ExtraDebug)
fprintf(stderr, "match @stx! [%s]\n", DumpState(s).c_str());
- if (params->matches && kind_ == Prog::kManyMatch) {
+ if (params->matches != NULL && kind_ == Prog::kManyMatch) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
if (id == MatchSep)
@@ -1466,7 +1466,7 @@
if (ExtraDebug)
fprintf(stderr, "match @%td! [%s]\n",
lastmatch - bp, DumpState(s).c_str());
- if (params->matches && kind_ == Prog::kManyMatch) {
+ if (params->matches != NULL && kind_ == Prog::kManyMatch) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
if (id == MatchSep)
@@ -1533,7 +1533,7 @@
lastmatch = p;
if (ExtraDebug)
fprintf(stderr, "match @etx! [%s]\n", DumpState(s).c_str());
- if (params->matches && kind_ == Prog::kManyMatch) {
+ if (params->matches != NULL && kind_ == Prog::kManyMatch) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
if (id == MatchSep)
@@ -1883,6 +1883,8 @@
bool anchored = anchor == kAnchored || anchor_start() || kind == kFullMatch;
bool endmatch = false;
if (kind == kManyMatch) {
+ // In theory, this could be if matches != NULL && kind == kManyMatch,
+ // but the Prog has been marked as anchored, which we cannot alter.
endmatch = true;
} else if (kind == kFullMatch || anchor_end()) {
endmatch = true;
@@ -1891,17 +1893,17 @@
// If the caller doesn't care where the match is (just whether one exists),
// then we can stop at the very first match we find, the so-called
- // "shortest match".
- bool want_shortest_match = false;
+ // "earliest match".
+ bool want_earliest_match = false;
if (match0 == NULL && !endmatch) {
- want_shortest_match = true;
+ want_earliest_match = true;
kind = kLongestMatch;
}
DFA* dfa = GetDFA(kind);
const char* ep;
bool matched = dfa->Search(text, context, anchored,
- want_shortest_match, !reversed_,
+ want_earliest_match, !reversed_,
failed, &ep, matches);
if (*failed)
return false;
diff --git a/re2/set.cc b/re2/set.cc
index d326905..05b7a3b 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -5,6 +5,7 @@
#include "re2/set.h"
#include <stddef.h>
+#include <memory>
#include "util/util.h"
#include "util/logging.h"
@@ -101,12 +102,14 @@
LOG(DFATAL) << "RE2::Set::Match without Compile";
return false;
}
- if (v != NULL)
- v->clear();
bool dfa_failed = false;
- SparseSet matches(size_);
- bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
- Prog::kManyMatch, NULL, &dfa_failed, &matches);
+ std::unique_ptr<SparseSet> matches;
+ if (v != NULL) {
+ matches.reset(new SparseSet(size_));
+ v->clear();
+ }
+ bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
+ NULL, &dfa_failed, matches.get());
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
@@ -116,12 +119,13 @@
}
if (ret == false)
return false;
- if (matches.empty()) {
- LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
- return false;
+ if (v != NULL) {
+ if (matches->empty()) {
+ LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
+ return false;
+ }
+ v->assign(matches->begin(), matches->end());
}
- if (v != NULL)
- v->assign(matches.begin(), matches.end());
return true;
}