Make the NFA execution engine use Prog::EmptyFlags().

Change-Id: Iaab77132da28aaf1b43e45b1c72d2597397462c3
Reviewed-on: https://code-review.googlesource.com/c/35770
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/nfa.cc b/re2/nfa.cc
index 1a3e0af..b2d5c0a 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -95,20 +95,20 @@
 
   // Follows all empty arrows from id0 and enqueues all the states reached.
   // Enqueues only the ByteRange instructions that match byte c.
-  // The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match.
+  // context is used (with p) for evaluating empty-width specials.
   // p is the current input position, and t0 is the current thread.
-  void AddToThreadq(Threadq* q, int id0, int c, int flag,
+  void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
                     const char* p, Thread* t0);
 
   // Run runq on byte c, appending new states to nextq.
   // Updates matched_ and match_ as new, better matches are found.
+  // context is used (with p) for evaluating empty-width specials.
   // p is the position of byte c in the input string for AddToThreadq;
   // p-1 will be used when processing Match instructions.
-  // flag is the bitwise OR of Bol, Eol, etc., specifying whether
-  // ^, $ and \b match the current input position (after c).
   // Frees all the threads on runq.
   // If there is a shortcut to the end, returns that shortcut.
-  inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p);
+  int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+           const char* p);
 
   // Returns text version of capture information, for debugging.
   string FormatCapture(const char** capture);
@@ -204,9 +204,9 @@
 
 // Follows all empty arrows from id0 and enqueues all the states reached.
 // Enqueues only the ByteRange instructions that match byte c.
-// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match.
+// context is used (with p) for evaluating empty-width specials.
 // p is the current input position, and t0 is the current thread.
-void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag,
+void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
                        const char* p, Thread* t0) {
   if (id0 == 0)
     return;
@@ -318,7 +318,7 @@
         stk[nstk++] = AddState(id+1);
 
       // Continue on if we have all the right flag bits.
-      if (ip->empty() & ~flag)
+      if (ip->empty() & ~Prog::EmptyFlags(context, p))
         break;
       a = AddState(ip->out());
       goto Loop;
@@ -328,13 +328,13 @@
 
 // Run runq on byte c, appending new states to nextq.
 // Updates matched_ and match_ as new, better matches are found.
+// context is used (with p) for evaluating empty-width specials.
 // p is the position of byte c in the input string for AddToThreadq;
 // p-1 will be used when processing Match instructions.
-// flag is the bitwise OR of Bol, Eol, etc., specifying whether
-// ^, $ and \b match the current input position (after c).
 // Frees all the threads on runq.
 // If there is a shortcut to the end, returns that shortcut.
-int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) {
+int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+              const char* p) {
   nextq->clear();
 
   for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
@@ -360,7 +360,7 @@
         break;
 
       case kInstByteRange:
-        AddToThreadq(nextq, ip->out(), c, flag, p, t);
+        AddToThreadq(nextq, ip->out(), c, context, p, t);
         break;
 
       case kInstAltMatch:
@@ -500,38 +500,9 @@
   runq->clear();
   nextq->clear();
   memset(&match_[0], 0, ncapture_*sizeof match_[0]);
-  int wasword = 0;
-
-  if (text.begin() > context.begin())
-    wasword = Prog::IsWordChar(text.begin()[-1] & 0xFF);
 
   // Loop over the text, stepping the machine.
   for (const char* p = text.begin();; p++) {
-    // Check for empty-width specials.
-    int flag = 0;
-
-    // ^ and \A
-    if (p == context.begin())
-      flag |= kEmptyBeginText | kEmptyBeginLine;
-    else if (p <= context.end() && p[-1] == '\n')
-      flag |= kEmptyBeginLine;
-
-    // $ and \z
-    if (p == context.end())
-      flag |= kEmptyEndText | kEmptyEndLine;
-    else if (p < context.end() && p[0] == '\n')
-      flag |= kEmptyEndLine;
-
-    // \b and \B
-    int isword = 0;
-    if (p < context.end())
-      isword = Prog::IsWordChar(p[0] & 0xFF);
-
-    if (isword != wasword)
-      flag |= kEmptyWordBoundary;
-    else
-      flag |= kEmptyNonWordBoundary;
-
     if (ExtraDebug) {
       int c = 0;
       if (p == context.begin())
@@ -541,7 +512,7 @@
       else if (p < text.end())
         c = p[0] & 0xFF;
 
-      fprintf(stderr, "%c[%#x/%d/%d]:", c, flag, isword, wasword);
+      fprintf(stderr, "%c:", c);
       for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
         Thread* t = i->second;
         if (t == NULL)
@@ -552,7 +523,7 @@
     }
 
     // This is a no-op the first time around the loop because runq is empty.
-    int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, flag, p);
+    int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p);
     DCHECK_EQ(runq->size(), 0);
     using std::swap;
     swap(nextq, runq);
@@ -604,17 +575,14 @@
         p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
         if (p == NULL) {
           p = text.end();
-          isword = 0;
-        } else {
-          isword = Prog::IsWordChar(p[0] & 0xFF);
         }
-        flag = Prog::EmptyFlags(context, p);
       }
 
       Thread* t = AllocThread();
       CopyCapture(t->capture, match_);
       t->capture[0] = p;
-      AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, flag, p, t);
+      AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p,
+                   t);
       Decref(t);
     }
 
@@ -624,8 +592,6 @@
         fprintf(stderr, "dead\n");
       break;
     }
-
-    wasword = isword;
   }
 
   for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)