Replace `StdIntMap` with `std::vector<int>`.
Change-Id: I877f3a8b363807cf96c66ba978c6c6cc6bd0688e
Reviewed-on: https://code-review.googlesource.com/c/re2/+/60010
Reviewed-by: Justin Lebar <jlebar@google.com>
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc
index b483b2e..1e4eaff 100644
--- a/re2/prefilter_tree.cc
+++ b/re2/prefilter_tree.cc
@@ -8,7 +8,6 @@
#include <algorithm>
#include <map>
#include <memory>
-#include <set>
#include <string>
#include <utility>
#include <vector>
@@ -36,9 +35,6 @@
PrefilterTree::~PrefilterTree() {
for (size_t i = 0; i < prefilter_vec_.size(); i++)
delete prefilter_vec_[i];
-
- for (size_t i = 0; i < entries_.size(); i++)
- delete entries_[i].parents;
}
void PrefilterTree::Add(Prefilter* prefilter) {
@@ -77,25 +73,21 @@
// not miss out on any regexps triggering by getting rid of a
// prefilter node.
for (size_t i = 0; i < entries_.size(); i++) {
- StdIntMap* parents = entries_[i].parents;
- if (parents->size() > 8) {
+ std::vector<int>& parents = entries_[i].parents;
+ if (parents.size() > 8) {
// This one triggers too many things. If all the parents are AND
// nodes and have other things guarding them, then get rid of
// this trigger. TODO(vsri): Adjust the threshold appropriately,
// make it a function of total number of nodes?
bool have_other_guard = true;
- for (StdIntMap::iterator it = parents->begin();
- it != parents->end(); ++it) {
+ for (int parent : parents) {
have_other_guard = have_other_guard &&
- (entries_[it->first].propagate_up_at_count > 1);
+ (entries_[parent].propagate_up_at_count > 1);
}
-
if (have_other_guard) {
- for (StdIntMap::iterator it = parents->begin();
- it != parents->end(); ++it)
- entries_[it->first].propagate_up_at_count -= 1;
-
- parents->clear(); // Forget the parents
+ for (int parent : parents)
+ entries_[parent].propagate_up_at_count -= 1;
+ parents.clear(); // Forget the parents
}
}
}
@@ -216,20 +208,7 @@
node->set_unique_id(canonical->unique_id());
}
}
- entries_.resize(nodes->size());
-
- // Create parent StdIntMap for the entries.
- for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
- Prefilter* prefilter = v[i];
- if (prefilter == NULL)
- continue;
-
- if (CanonicalNode(nodes, prefilter) != prefilter)
- continue;
-
- Entry* entry = &entries_[prefilter->unique_id()];
- entry->parents = new StdIntMap();
- }
+ entries_.resize(unique_id);
// Fill the entries.
for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
@@ -240,41 +219,34 @@
if (CanonicalNode(nodes, prefilter) != prefilter)
continue;
- Entry* entry = &entries_[prefilter->unique_id()];
+ int id = prefilter->unique_id();
switch (prefilter->op()) {
default:
- case Prefilter::ALL:
LOG(DFATAL) << "Unexpected op: " << prefilter->op();
return;
case Prefilter::ATOM:
- entry->propagate_up_at_count = 1;
+ entries_[id].propagate_up_at_count = 1;
break;
case Prefilter::OR:
case Prefilter::AND: {
- std::set<int> uniq_child;
+ // For each child, we append our id to the child's list of
+ // parent ids... unless we happen to have done so already.
+ // The number of appends is the number of unique children,
+ // which allows correct upward propagation from AND nodes.
+ int up_count = 0;
for (size_t j = 0; j < prefilter->subs()->size(); j++) {
- Prefilter* child = (*prefilter->subs())[j];
- Prefilter* canonical = CanonicalNode(nodes, child);
- if (canonical == NULL) {
- LOG(DFATAL) << "Null canonical node";
- return;
- }
- int child_id = canonical->unique_id();
- uniq_child.insert(child_id);
- // To the child, we want to add to parent indices.
- Entry* child_entry = &entries_[child_id];
- if (child_entry->parents->find(prefilter->unique_id()) ==
- child_entry->parents->end()) {
- (*child_entry->parents)[prefilter->unique_id()] = 1;
+ int child_id = (*prefilter->subs())[j]->unique_id();
+ std::vector<int>& parents = entries_[child_id].parents;
+ if (parents.empty() || parents.back() != id) {
+ parents.push_back(id);
+ up_count++;
}
}
- entry->propagate_up_at_count = prefilter->op() == Prefilter::AND
- ? static_cast<int>(uniq_child.size())
- : 1;
-
+ entries_[id].propagate_up_at_count =
+ prefilter->op() == Prefilter::AND ? up_count : 1;
break;
}
}
@@ -335,10 +307,7 @@
regexps->set(entry.regexps[i], 1);
int c;
// Pass trigger up to parents.
- for (StdIntMap::iterator it = entry.parents->begin();
- it != entry.parents->end();
- ++it) {
- int j = it->first;
+ for (int j : entry.parents) {
const Entry& parent = entries_[j];
// Delay until all the children have succeeded.
if (parent.propagate_up_at_count > 1) {
@@ -368,12 +337,12 @@
LOG(ERROR) << "#Unique Nodes: " << entries_.size();
for (size_t i = 0; i < entries_.size(); i++) {
- StdIntMap* parents = entries_[i].parents;
+ const std::vector<int>& parents = entries_[i].parents;
const std::vector<int>& regexps = entries_[i].regexps;
LOG(ERROR) << "EntryId: " << i
- << " N: " << parents->size() << " R: " << regexps.size();
- for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
- LOG(ERROR) << it->first;
+ << " N: " << parents.size() << " R: " << regexps.size();
+ for (int parent : parents)
+ LOG(ERROR) << parent;
}
LOG(ERROR) << "Map:";
for (NodeMap::const_iterator iter = nodes->begin();
diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h
index aa8f913..6de1c38 100644
--- a/re2/prefilter_tree.h
+++ b/re2/prefilter_tree.h
@@ -18,7 +18,6 @@
#include <map>
#include <string>
-#include <unordered_map>
#include <vector>
#include "util/util.h"
@@ -60,7 +59,6 @@
private:
typedef SparseArray<int> IntMap;
- typedef std::unordered_map<int, int> StdIntMap;
// TODO(junyer): Use std::unordered_set<Prefilter*> instead?
// It should be trivial to get rid of the stringification...
typedef std::map<std::string, Prefilter*> NodeMap;
@@ -80,7 +78,7 @@
// are two different nodes, but they share the atom 'def'. So when
// 'def' matches, it triggers two parents, corresponding to the two
// different OR nodes.
- StdIntMap* parents;
+ std::vector<int> parents;
// When this node is ready to trigger the parent, what are the
// regexps that are triggered.