Compute first_byte_ eagerly.

Change-Id: Id4ebc31a21914c0cfdde40037da1bec9ab538c76
Reviewed-on: https://code-review.googlesource.com/c/re2/+/55234
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/compile.cc b/re2/compile.cc
index 326ab15..b3e5bdc 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc
@@ -1186,6 +1186,7 @@
   prog_->Optimize();
   prog_->Flatten();
   prog_->ComputeByteMap();
+  prog_->ComputeFirstByte();
 
   // Record remaining memory for DFA.
   if (max_mem_ <= 0) {
diff --git a/re2/nfa.cc b/re2/nfa.cc
index 77fb5fb..75ca306 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -629,10 +629,7 @@
   return false;
 }
 
-// Computes whether all successful matches have a common first byte,
-// and if so, returns that byte.  If not, returns -1.
-int Prog::ComputeFirstByte() {
-  int b = -1;
+void Prog::ComputeFirstByte() {
   SparseSet q(size());
   q.insert(start());
   for (SparseSet::iterator it = q.begin(); it != q.end(); ++it) {
@@ -645,23 +642,27 @@
 
       case kInstMatch:
         // The empty string matches: no first byte.
-        return -1;
+        first_byte_ = -1;
+        return;
 
       case kInstByteRange:
         if (!ip->last())
           q.insert(id+1);
 
-        // Must match only a single byte
-        if (ip->lo() != ip->hi())
-          return -1;
-        if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')
-          return -1;
+        // Must match only a single byte.
+        if (ip->lo() != ip->hi() ||
+            (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')) {
+          first_byte_ = -1;
+          return;
+        }
         // If we haven't seen any bytes yet, record it;
         // otherwise must match the one we saw before.
-        if (b == -1)
-          b = ip->lo();
-        else if (b != ip->lo())
-          return -1;
+        if (first_byte_ == -1) {
+          first_byte_ = ip->lo();
+        } else if (first_byte_ != ip->lo()) {
+          first_byte_ = -1;
+          return;
+        }
         break;
 
       case kInstNop:
@@ -687,7 +688,6 @@
         break;
     }
   }
-  return b;
 }
 
 bool
diff --git a/re2/prog.cc b/re2/prog.cc
index 9e91faf..9244319 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -184,14 +184,31 @@
   return map;
 }
 
-int Prog::first_byte() {
-  std::call_once(first_byte_once_, [](Prog* prog) {
-    prog->first_byte_ = prog->ComputeFirstByte();
-  }, this);
-  return first_byte_;
-}
+// Is ip a guaranteed match at end of text, perhaps after some capturing?
+static bool IsMatch(Prog* prog, Prog::Inst* ip) {
+  for (;;) {
+    switch (ip->opcode()) {
+      default:
+        LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
+        return false;
 
-static bool IsMatch(Prog*, Prog::Inst*);
+      case kInstAlt:
+      case kInstAltMatch:
+      case kInstByteRange:
+      case kInstFail:
+      case kInstEmptyWidth:
+        return false;
+
+      case kInstCapture:
+      case kInstNop:
+        ip = prog->inst(ip->out());
+        break;
+
+      case kInstMatch:
+        return true;
+    }
+  }
+}
 
 // Peep-hole optimizer.
 void Prog::Optimize() {
@@ -257,32 +274,6 @@
   }
 }
 
-// Is ip a guaranteed match at end of text, perhaps after some capturing?
-static bool IsMatch(Prog* prog, Prog::Inst* ip) {
-  for (;;) {
-    switch (ip->opcode()) {
-      default:
-        LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
-        return false;
-
-      case kInstAlt:
-      case kInstAltMatch:
-      case kInstByteRange:
-      case kInstFail:
-      case kInstEmptyWidth:
-        return false;
-
-      case kInstCapture:
-      case kInstNop:
-        ip = prog->inst(ip->out());
-        break;
-
-      case kInstMatch:
-        return true;
-    }
-  }
-}
-
 uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
   int flags = 0;
 
diff --git a/re2/prog.h b/re2/prog.h
index 4ba72c0..4306672 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -215,9 +215,7 @@
   void set_anchor_end(bool b) { anchor_end_ = b; }
   int bytemap_range() { return bytemap_range_; }
   const uint8_t* bytemap() { return bytemap_; }
-
-  // Lazily computed.
-  int first_byte();
+  int first_byte() { return first_byte_; }
 
   // Returns string representation of program for debugging.
   std::string Dump();
@@ -295,9 +293,8 @@
   // Compute bytemap.
   void ComputeByteMap();
 
-  // Computes whether all matches must begin with the same first
-  // byte, and if so, returns that byte.  If not, returns -1.
-  int ComputeFirstByte();
+  // Computes whether all matches must begin with the same first byte.
+  void ComputeFirstByte();
 
   // Run peep-hole optimizer on program.
   void Optimize();
@@ -416,7 +413,6 @@
 
   uint8_t bytemap_[256];    // map from input bytes to byte classes
 
-  std::once_flag first_byte_once_;
   std::once_flag dfa_first_once_;
   std::once_flag dfa_longest_once_;