Make RE2::Set canonicalize DFA states.

Change-Id: Ib397b8fb7af447e62fbe8a30f75e3da8322e44a6
Reviewed-on: https://code-review.googlesource.com/c/re2/+/42570
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 81adb30..c740d8a 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -119,7 +119,6 @@
   // byte c, the next state should be s->next_[c].
   struct State {
     inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
-    void SaveMatch(std::vector<int>* v);
 
     int* inst_;         // Instruction pointers in the state.
     int ninst_;         // # of inst_ pointers.
@@ -714,6 +713,15 @@
     }
   }
 
+  // If we're in many match mode, canonicalize for similar reasons:
+  // we have an unordered set of states (i.e. we don't have Marks)
+  // and sorting will reduce the number of distinct sets stored.
+  if (kind_ == Prog::kManyMatch) {
+    int* ip = inst;
+    int* ep = ip + n;
+    std::sort(ip, ep);
+  }
+
   // Append MatchSep and the match IDs in mq if necessary.
   if (mq != NULL) {
     inst[n++] = MatchSep;