Remove first_byte from StartInfo and simplify accordingly.

Change-Id: I42786d4934e00ef7582d1a5d71a06ae624b05be5
Reviewed-on: https://code-review.googlesource.com/c/re2/+/55872
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 7ffb497..f0a79f0 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -167,12 +167,6 @@
   typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
 
  private:
-  // Special "first_byte" values for a state.  (Values >= 0 denote actual bytes.)
-  enum {
-    kFbUnknown = -1,   // No analysis has been performed.
-    kFbNone = -2,      // The first-byte trick cannot be used.
-  };
-
   enum {
     // Indices into start_ for unanchored searches.
     // Add kStartAnchored for anchored searches.
@@ -239,25 +233,26 @@
   struct SearchParams {
     SearchParams(const StringPiece& text, const StringPiece& context,
                  RWLocker* cache_lock)
-      : text(text), context(context),
+      : text(text),
+        context(context),
         anchored(false),
+        have_first_byte(false),
         want_earliest_match(false),
         run_forward(false),
         start(NULL),
-        first_byte(kFbUnknown),
         cache_lock(cache_lock),
         failed(false),
         ep(NULL),
-        matches(NULL) { }
+        matches(NULL) {}
 
     StringPiece text;
     StringPiece context;
     bool anchored;
+    bool have_first_byte;
     bool want_earliest_match;
     bool run_forward;
     State* start;
-    int first_byte;
-    RWLocker *cache_lock;
+    RWLocker* cache_lock;
     bool failed;     // "out" parameter: whether search gave up
     const char* ep;  // "out" parameter: end pointer for match
     SparseSet* matches;
@@ -268,15 +263,13 @@
   };
 
   // Before each search, the parameters to Search are analyzed by
-  // AnalyzeSearch to determine the state in which to start and the
-  // "first_byte" for that state, if any.
+  // AnalyzeSearch to determine the state in which to start.
   struct StartInfo {
-    StartInfo() : start(NULL), first_byte(kFbUnknown) {}
-    State* start;
-    std::atomic<int> first_byte;
+    StartInfo() : start(NULL) {}
+    std::atomic<State*> start;
   };
 
-  // Fills in params->start and params->first_byte using
+  // Fills in params->start and params->have_first_byte using
   // the other search parameters.  Returns true on success,
   // false on failure.
   // cache_mutex_.r <= L < mutex_
@@ -1183,10 +1176,8 @@
   });
 
   // Clear the cache, reset the memory budget.
-  for (int i = 0; i < kMaxStart; i++) {
-    start_[i].start = NULL;
-    start_[i].first_byte.store(kFbUnknown, std::memory_order_relaxed);
-  }
+  for (int i = 0; i < kMaxStart; i++)
+    start_[i].start.store(NULL, std::memory_order_relaxed);
   ClearCache();
   mem_budget_ = state_budget_;
 }
@@ -1349,6 +1340,7 @@
     swap(p, ep);
   }
 
+  int first_byte = prog_->first_byte();
   const uint8_t* bytemap = prog_->bytemap();
   const uint8_t* lastmatch = NULL;   // most recent matching position in text
   bool matched = false;
@@ -1385,7 +1377,7 @@
       // so use optimized assembly in memchr to skip ahead.
       // If first_byte isn't found, we can skip to the end
       // of the string.
-      if ((p = BytePtr(memchr(p, params->first_byte, ep - p))) == NULL) {
+      if ((p = BytePtr(memchr(p, first_byte, ep - p))) == NULL) {
         p = ep;
         break;
       }
@@ -1589,7 +1581,7 @@
 // For debugging, calls the general code directly.
 bool DFA::SlowSearchLoop(SearchParams* params) {
   return InlinedSearchLoop(params,
-                           params->first_byte >= 0,
+                           params->have_first_byte,
                            params->want_earliest_match,
                            params->run_forward);
 }
@@ -1610,8 +1602,7 @@
     &DFA::SearchTTT,
   };
 
-  bool have_first_byte = params->first_byte >= 0;
-  int index = 4 * have_first_byte +
+  int index = 4 * params->have_first_byte +
               2 * params->want_earliest_match +
               1 * params->run_forward;
   return (this->*Searches[index])(params);
@@ -1703,13 +1694,21 @@
     }
   }
 
-  if (ExtraDebug)
-    fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s first_byte=%d\n",
-            params->anchored, params->run_forward, flags,
-            DumpState(info->start).c_str(), info->first_byte.load());
+  params->start = info->start.load(std::memory_order_acquire);
 
-  params->start = info->start;
-  params->first_byte = info->first_byte.load(std::memory_order_acquire);
+  // Even if we have a first byte, we cannot use it when anchored and,
+  // less obviously, we cannot use it when we are going to need flags.
+  // This trick works only when there is a single byte that leads to a
+  // different state!
+  if (prog_->first_byte() >= 0 &&
+      !params->anchored &&
+      params->start->flag_ >> kFlagNeedShift == 0)
+    params->have_first_byte = true;
+
+  if (ExtraDebug)
+    fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s have_first_byte=%d\n",
+            params->anchored, params->run_forward, flags,
+            DumpState(params->start).c_str(), params->have_first_byte);
 
   return true;
 }
@@ -1718,47 +1717,25 @@
 bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
                               uint32_t flags) {
   // Quick check.
-  int fb = info->first_byte.load(std::memory_order_acquire);
-  if (fb != kFbUnknown)
+  State* start = info->start.load(std::memory_order_acquire);
+  if (start != NULL)
     return true;
 
   MutexLock l(&mutex_);
-  fb = info->first_byte.load(std::memory_order_relaxed);
-  if (fb != kFbUnknown)
+  start = info->start.load(std::memory_order_relaxed);
+  if (start != NULL)
     return true;
 
   q0_->clear();
   AddToQueue(q0_,
              params->anchored ? prog_->start() : prog_->start_unanchored(),
              flags);
-  info->start = WorkqToCachedState(q0_, NULL, flags);
-  if (info->start == NULL)
+  start = WorkqToCachedState(q0_, NULL, flags);
+  if (start == NULL)
     return false;
 
-  if (info->start == DeadState) {
-    // Synchronize with "quick check" above.
-    info->first_byte.store(kFbNone, std::memory_order_release);
-    return true;
-  }
-
-  if (info->start == FullMatchState) {
-    // Synchronize with "quick check" above.
-    info->first_byte.store(kFbNone, std::memory_order_release);  // will be ignored
-    return true;
-  }
-
-  // Even if we have a first_byte, we cannot use it when anchored and,
-  // less obviously, we cannot use it when we are going to need flags.
-  // This trick works only when there is a single byte that leads to a
-  // different state!
-  int first_byte = prog_->first_byte();
-  if (first_byte == -1 ||
-      params->anchored ||
-      info->start->flag_ >> kFlagNeedShift != 0)
-    first_byte = kFbNone;
-
   // Synchronize with "quick check" above.
-  info->first_byte.store(first_byte, std::memory_order_release);
+  info->start.store(start, std::memory_order_release);
   return true;
 }