Make RE2::Set use a SparseSet internally.

Change-Id: I204a0ff750caaff95f352b364ea18fc9abafd0e7
Reviewed-on: https://code-review.googlesource.com/17451
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/dfa.cc b/re2/dfa.cc
index fcb34c5..22f6be6 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -96,7 +96,7 @@
   //   memory), it sets *failed and returns false.
   bool Search(const StringPiece& text, const StringPiece& context,
               bool anchored, bool want_earliest_match, bool run_forward,
-              bool* failed, const char** ep, std::vector<int>* matches);
+              bool* failed, const char** ep, SparseSet* matches);
 
   // Builds out all states for the entire DFA.
   // If cb is not empty, it receives one callback per state built.
@@ -272,7 +272,7 @@
     RWLocker *cache_lock;
     bool failed;     // "out" parameter: whether search gave up
     const char* ep;  // "out" parameter: end pointer for match
-    std::vector<int>* matches;
+    SparseSet* matches;
 
    private:
     SearchParams(const SearchParams&) = delete;
@@ -1501,13 +1501,11 @@
     if (ExtraDebug)
       fprintf(stderr, "match @etx! [%s]\n", DumpState(s).c_str());
     if (params->matches && kind_ == Prog::kManyMatch) {
-      std::vector<int>* v = params->matches;
-      v->clear();
       for (int i = 0; i < s->ninst_; i++) {
         Prog::Inst* ip = prog_->inst(s->inst_[i]);
         for (;;) {
           if (ip->opcode() == kInstMatch)
-            v->push_back(ip->match_id());
+            params->matches->insert(ip->match_id());
           if (ip->last())
             break;
           ip++;
@@ -1742,7 +1740,7 @@
                  bool run_forward,
                  bool* failed,
                  const char** epp,
-                 std::vector<int>* matches) {
+                 SparseSet* matches) {
   *epp = NULL;
   if (!ok()) {
     *failed = true;
@@ -1833,7 +1831,7 @@
 //
 bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
                      Anchor anchor, MatchKind kind, StringPiece* match0,
-                     bool* failed, std::vector<int>* matches) {
+                     bool* failed, SparseSet* matches) {
   *failed = false;
 
   StringPiece context = const_context;
diff --git a/re2/prog.h b/re2/prog.h
index 3176898..65741b2 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -255,7 +255,7 @@
   // SearchDFA fills matches with the match IDs of the final matching state.
   bool SearchDFA(const StringPiece& text, const StringPiece& context,
                  Anchor anchor, MatchKind kind, StringPiece* match0,
-                 bool* failed, std::vector<int>* matches);
+                 bool* failed, SparseSet* matches);
 
   // The callback issued after building each DFA state with BuildEntireDFA().
   // If next is null, then the memory budget has been exhausted and building
diff --git a/re2/set.cc b/re2/set.cc
index 1ee431c..d326905 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -20,6 +20,7 @@
   anchor_ = anchor;
   prog_ = NULL;
   compiled_ = false;
+  size_ = 0;
 }
 
 RE2::Set::~Set() {
@@ -75,6 +76,7 @@
     return false;
   }
   compiled_ = true;
+  size_ = static_cast<int>(re_.size());
 
   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
     options_.ParseFlags());
@@ -102,8 +104,9 @@
   if (v != NULL)
     v->clear();
   bool dfa_failed = false;
+  SparseSet matches(size_);
   bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
-                              Prog::kManyMatch, NULL, &dfa_failed, v);
+                              Prog::kManyMatch, NULL, &dfa_failed, &matches);
   if (dfa_failed) {
     if (options_.log_errors())
       LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
@@ -113,10 +116,12 @@
   }
   if (ret == false)
     return false;
-  if (v != NULL && v->empty()) {
+  if (matches.empty()) {
     LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
     return false;
   }
+  if (v != NULL)
+    v->assign(matches.begin(), matches.end());
   return true;
 }
 
diff --git a/re2/set.h b/re2/set.h
index c31d500..f3d37d8 100644
--- a/re2/set.h
+++ b/re2/set.h
@@ -50,6 +50,7 @@
   std::vector<re2::Regexp*> re_;
   re2::Prog* prog_;
   bool compiled_;
+  int size_;
 
   Set(const Set&) = delete;
   Set& operator=(const Set&) = delete;