Make RE2::Set canonicalize DFA states.
Change-Id: Ib397b8fb7af447e62fbe8a30f75e3da8322e44a6
Reviewed-on: https://code-review.googlesource.com/c/re2/+/42570
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 81adb30..c740d8a 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -119,7 +119,6 @@
// byte c, the next state should be s->next_[c].
struct State {
inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
- void SaveMatch(std::vector<int>* v);
int* inst_; // Instruction pointers in the state.
int ninst_; // # of inst_ pointers.
@@ -714,6 +713,15 @@
}
}
+ // If we're in many match mode, canonicalize for similar reasons:
+ // we have an unordered set of states (i.e. we don't have Marks)
+ // and sorting will reduce the number of distinct sets stored.
+ if (kind_ == Prog::kManyMatch) {
+ int* ip = inst;
+ int* ep = ip + n;
+ std::sort(ip, ep);
+ }
+
// Append MatchSep and the match IDs in mq if necessary.
if (mq != NULL) {
inst[n++] = MatchSep;