Make RE2::Set use a SparseSet internally.
Change-Id: I204a0ff750caaff95f352b364ea18fc9abafd0e7
Reviewed-on: https://code-review.googlesource.com/17451
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/dfa.cc b/re2/dfa.cc
index fcb34c5..22f6be6 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -96,7 +96,7 @@
// memory), it sets *failed and returns false.
bool Search(const StringPiece& text, const StringPiece& context,
bool anchored, bool want_earliest_match, bool run_forward,
- bool* failed, const char** ep, std::vector<int>* matches);
+ bool* failed, const char** ep, SparseSet* matches);
// Builds out all states for the entire DFA.
// If cb is not empty, it receives one callback per state built.
@@ -272,7 +272,7 @@
RWLocker *cache_lock;
bool failed; // "out" parameter: whether search gave up
const char* ep; // "out" parameter: end pointer for match
- std::vector<int>* matches;
+ SparseSet* matches;
private:
SearchParams(const SearchParams&) = delete;
@@ -1501,13 +1501,11 @@
if (ExtraDebug)
fprintf(stderr, "match @etx! [%s]\n", DumpState(s).c_str());
if (params->matches && kind_ == Prog::kManyMatch) {
- std::vector<int>* v = params->matches;
- v->clear();
for (int i = 0; i < s->ninst_; i++) {
Prog::Inst* ip = prog_->inst(s->inst_[i]);
for (;;) {
if (ip->opcode() == kInstMatch)
- v->push_back(ip->match_id());
+ params->matches->insert(ip->match_id());
if (ip->last())
break;
ip++;
@@ -1742,7 +1740,7 @@
bool run_forward,
bool* failed,
const char** epp,
- std::vector<int>* matches) {
+ SparseSet* matches) {
*epp = NULL;
if (!ok()) {
*failed = true;
@@ -1833,7 +1831,7 @@
//
bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
Anchor anchor, MatchKind kind, StringPiece* match0,
- bool* failed, std::vector<int>* matches) {
+ bool* failed, SparseSet* matches) {
*failed = false;
StringPiece context = const_context;
diff --git a/re2/prog.h b/re2/prog.h
index 3176898..65741b2 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -255,7 +255,7 @@
// SearchDFA fills matches with the match IDs of the final matching state.
bool SearchDFA(const StringPiece& text, const StringPiece& context,
Anchor anchor, MatchKind kind, StringPiece* match0,
- bool* failed, std::vector<int>* matches);
+ bool* failed, SparseSet* matches);
// The callback issued after building each DFA state with BuildEntireDFA().
// If next is null, then the memory budget has been exhausted and building
diff --git a/re2/set.cc b/re2/set.cc
index 1ee431c..d326905 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -20,6 +20,7 @@
anchor_ = anchor;
prog_ = NULL;
compiled_ = false;
+ size_ = 0;
}
RE2::Set::~Set() {
@@ -75,6 +76,7 @@
return false;
}
compiled_ = true;
+ size_ = static_cast<int>(re_.size());
Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
options_.ParseFlags());
@@ -102,8 +104,9 @@
if (v != NULL)
v->clear();
bool dfa_failed = false;
+ SparseSet matches(size_);
bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
- Prog::kManyMatch, NULL, &dfa_failed, v);
+ Prog::kManyMatch, NULL, &dfa_failed, &matches);
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
@@ -113,10 +116,12 @@
}
if (ret == false)
return false;
- if (v != NULL && v->empty()) {
+ if (matches.empty()) {
LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
return false;
}
+ if (v != NULL)
+ v->assign(matches.begin(), matches.end());
return true;
}
diff --git a/re2/set.h b/re2/set.h
index c31d500..f3d37d8 100644
--- a/re2/set.h
+++ b/re2/set.h
@@ -50,6 +50,7 @@
std::vector<re2::Regexp*> re_;
re2::Prog* prog_;
bool compiled_;
+ int size_;
Set(const Set&) = delete;
Set& operator=(const Set&) = delete;