Don't assume that iterators are just pointers.

That is the case for re2::StringPiece and absl::string_view
and happens to be the case for std::string_view in libstdc++
and libc++, but std::string_view in MSVC has iterators that
aren't just pointers.

Fixes #214.

Change-Id: I99542a1b53f138228a54f3845df140465a71980d
Reviewed-on: https://code-review.googlesource.com/c/re2/+/44230
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 6f045b1..317b26f 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -86,7 +86,7 @@
 // we don't repeat the visit.
 bool BitState::ShouldVisit(int id, const char* p) {
   int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) +
-          static_cast<int>(p-text_.begin());
+          static_cast<int>(p-text_.data());
   if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))
     return false;
   visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1));
@@ -134,7 +134,7 @@
 // Return whether it succeeded.
 bool BitState::TrySearch(int id0, const char* p0) {
   bool matched = false;
-  const char* end = text_.end();
+  const char* end = text_.data() + text_.size();
   njob_ = 0;
   // Push() no longer checks ShouldVisit(),
   // so we must perform the check ourselves.
@@ -251,7 +251,7 @@
         matched = true;
         cap_[1] = p;
         if (submatch_[0].data() == NULL ||
-            (longest_ && p > submatch_[0].end())) {
+            (longest_ && p > submatch_[0].data() + submatch_[0].size())) {
           for (int i = 0; i < nsubmatch_; i++)
             submatch_[i] =
                 StringPiece(cap_[2 * i],
@@ -288,7 +288,7 @@
   // Search parameters.
   text_ = text;
   context_ = context;
-  if (context_.begin() == NULL)
+  if (context_.data() == NULL)
     context_ = text;
   if (prog_->anchor_start() && context_.begin() != text.begin())
     return false;
@@ -319,8 +319,8 @@
 
   // Anchored search must start at text.begin().
   if (anchored_) {
-    cap_[0] = text.begin();
-    return TrySearch(prog_->start(), text.begin());
+    cap_[0] = text.data();
+    return TrySearch(prog_->start(), text.data());
   }
 
   // Unanchored search, starting from each possible text position.
@@ -329,13 +329,14 @@
   // This looks like it's quadratic in the size of the text,
   // but we are not clearing visited_ between calls to TrySearch,
   // so no work is duplicated and it ends up still being linear.
-  for (const char* p = text.begin(); p <= text.end(); p++) {
+  for (const char* p = text.data(); p <= text.data() + text.size(); p++) {
     // Try to use memchr to find the first byte quickly.
     int fb = prog_->first_byte();
-    if (fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) {
-      p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
+    if (fb >= 0 && p < text.data() + text.size() && (p[0] & 0xFF) != fb) {
+      p = reinterpret_cast<const char*>(
+          memchr(p, fb, text.data() + text.size() - p));
       if (p == NULL)
-        p = text.end();
+        p = text.data() + text.size();
     }
 
     cap_[0] = p;
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 40880f9..77a1fb6 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -1328,10 +1328,11 @@
                                    bool want_earliest_match,
                                    bool run_forward) {
   State* start = params->start;
-  const uint8_t* bp = BytePtr(params->text.begin());  // start of text
-  const uint8_t* p = bp;                              // text scanning point
-  const uint8_t* ep = BytePtr(params->text.end());    // end of text
-  const uint8_t* resetp = NULL;                       // p at last cache reset
+  const uint8_t* bp = BytePtr(params->text.data());  // start of text
+  const uint8_t* p = bp;                             // text scanning point
+  const uint8_t* ep = BytePtr(params->text.data() +
+                              params->text.size());  // end of text
+  const uint8_t* resetp = NULL;                      // p at last cache reset
   if (!run_forward) {
     using std::swap;
     swap(p, ep);
@@ -1798,9 +1799,9 @@
     return false;
   if (params.start == FullMatchState) {
     if (run_forward == want_earliest_match)
-      *epp = text.begin();
+      *epp = text.data();
     else
-      *epp = text.end();
+      *epp = text.data() + text.size();
     return true;
   }
   if (ExtraDebug)
@@ -1863,7 +1864,7 @@
   *failed = false;
 
   StringPiece context = const_context;
-  if (context.begin() == NULL)
+  if (context.data() == NULL)
     context = text;
   bool carat = anchor_start();
   bool dollar = anchor_end();
@@ -1910,7 +1911,7 @@
     return false;
   if (!matched)
     return false;
-  if (endmatch && ep != (reversed_ ? text.begin() : text.end()))
+  if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
     return false;
 
   // If caller cares, record the boundary of the match.
@@ -1918,10 +1919,11 @@
   // as the beginning.
   if (match0) {
     if (reversed_)
-      *match0 = StringPiece(ep, static_cast<size_t>(text.end() - ep));
+      *match0 =
+          StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
     else
       *match0 =
-          StringPiece(text.begin(), static_cast<size_t>(ep - text.begin()));
+          StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
   }
   return true;
 }
diff --git a/re2/nfa.cc b/re2/nfa.cc
index 7bb4faf..e16831e 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -448,7 +448,7 @@
     return false;
 
   StringPiece context = const_context;
-  if (context.begin() == NULL)
+  if (context.data() == NULL)
     context = text;
 
   // Sanity check: make sure that text lies within context.
@@ -465,7 +465,6 @@
   if (prog_->anchor_end()) {
     longest = true;
     endmatch_ = true;
-    etext_ = text.end();
   }
 
   if (nsubmatch < 0) {
@@ -488,7 +487,9 @@
   matched_ = false;
 
   // For debugging prints.
-  btext_ = context.begin();
+  btext_ = context.data();
+  // For convenience.
+  etext_ = text.data() + text.size();
 
   if (ExtraDebug)
     fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
@@ -503,14 +504,14 @@
   memset(&match_[0], 0, ncapture_*sizeof match_[0]);
 
   // Loop over the text, stepping the machine.
-  for (const char* p = text.begin();; p++) {
+  for (const char* p = text.data();; p++) {
     if (ExtraDebug) {
       int c = 0;
-      if (p == context.begin())
+      if (p == btext_)
         c = '^';
-      else if (p > text.end())
+      else if (p > etext_)
         c = '$';
-      else if (p < text.end())
+      else if (p < etext_)
         c = p[0] & 0xFF;
 
       fprintf(stderr, "%c:", c);
@@ -524,14 +525,14 @@
     }
 
     // This is a no-op the first time around the loop because runq is empty.
-    int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p);
+    int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
     DCHECK_EQ(runq->size(), 0);
     using std::swap;
     swap(nextq, runq);
     nextq->clear();
     if (id != 0) {
       // We're done: full match ahead.
-      p = text.end();
+      p = etext_;
       for (;;) {
         Prog::Inst* ip = prog_->inst(id);
         switch (ip->opcode()) {
@@ -559,30 +560,30 @@
       break;
     }
 
-    if (p > text.end())
+    if (p > etext_)
       break;
 
     // Start a new thread if there have not been any matches.
     // (No point in starting a new thread if there have been
     // matches, since it would be to the right of the match
     // we already found.)
-    if (!matched_ && (!anchored || p == text.begin())) {
+    if (!matched_ && (!anchored || p == text.data())) {
       // If there's a required first byte for an unanchored search
       // and we're not in the middle of any possible matches,
       // use memchr to search for the byte quickly.
       int fb = prog_->first_byte();
       if (!anchored && runq->size() == 0 &&
-          fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) {
-        p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
+          fb >= 0 && p < etext_ && (p[0] & 0xFF) != fb) {
+        p = reinterpret_cast<const char*>(memchr(p, fb, etext_ - p));
         if (p == NULL) {
-          p = text.end();
+          p = etext_;
         }
       }
 
       Thread* t = AllocThread();
       CopyCapture(t->capture, match_);
       t->capture[0] = p;
-      AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p,
+      AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
                    t);
       Decref(t);
     }
diff --git a/re2/onepass.cc b/re2/onepass.cc
index d615893..7a774ce 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc
@@ -235,7 +235,7 @@
     matchcap[i] = NULL;
 
   StringPiece context = const_context;
-  if (context.begin() == NULL)
+  if (context.data() == NULL)
     context = text;
   if (anchor_start() && context.begin() != text.begin())
     return false;
@@ -249,8 +249,8 @@
   // start() is always mapped to the zeroth OneState.
   OneState* state = IndexToNode(nodes, statesize, 0);
   uint8_t* bytemap = bytemap_;
-  const char* bp = text.begin();
-  const char* ep = text.end();
+  const char* bp = text.data();
+  const char* ep = text.data() + text.size();
   const char* p;
   bool matched = false;
   matchcap[0] = bp;
diff --git a/re2/parse.cc b/re2/parse.cc
index 93b922a..03b53c7 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -1447,7 +1447,7 @@
 // Sets *rp to the named character.
 static bool ParseEscape(StringPiece* s, Rune* rp,
                         RegexpStatus* status, int rune_max) {
-  const char* begin = s->begin();
+  const char* begin = s->data();
   if (s->size() < 1 || (*s)[0] != '\\') {
     // Should not happen - caller always checks.
     status->set_code(kRegexpInternalError);
@@ -1590,7 +1590,7 @@
   // Unrecognized escape sequence.
   status->set_code(kRegexpBadEscape);
   status->set_error_arg(
-      StringPiece(begin, static_cast<size_t>(s->begin() - begin)));
+      StringPiece(begin, static_cast<size_t>(s->data() - begin)));
   return false;
 }
 
@@ -1710,7 +1710,7 @@
     return NULL;
   // Could use StringPieceToRune, but there aren't
   // any non-ASCII Perl group names.
-  StringPiece name(s->begin(), 2);
+  StringPiece name(s->data(), 2);
   const UGroup *g = LookupPerlGroup(name);
   if (g == NULL)
     return NULL;
@@ -1750,8 +1750,8 @@
     return kParseError;
   if (c != '{') {
     // Name is the bit of string we just skipped over for c.
-    const char* p = seq.begin() + 2;
-    name = StringPiece(p, static_cast<size_t>(s->begin() - p));
+    const char* p = seq.data() + 2;
+    name = StringPiece(p, static_cast<size_t>(s->data() - p));
   } else {
     // Name is in braces. Look for closing }
     size_t end = s->find('}', 0);
@@ -1762,14 +1762,14 @@
       status->set_error_arg(seq);
       return kParseError;
     }
-    name = StringPiece(s->begin(), end);  // without '}'
+    name = StringPiece(s->data(), end);  // without '}'
     s->remove_prefix(end + 1);  // with '}'
     if (!IsValidUTF8(name, status))
       return kParseError;
   }
 
   // Chop seq where s now begins.
-  seq = StringPiece(seq.begin(), static_cast<size_t>(s->begin() - seq.begin()));
+  seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));
 
   if (name.size() > 0 && name[0] == '^') {
     sign = -sign;
@@ -2074,8 +2074,8 @@
     }
 
     // t is "P<name>...", t[end] == '>'
-    StringPiece capture(t.begin()-2, end+3);  // "(?P<name>"
-    StringPiece name(t.begin()+2, end-2);     // "name"
+    StringPiece capture(t.data()-2, end+3);  // "(?P<name>"
+    StringPiece name(t.data()+2, end-2);     // "name"
     if (!IsValidUTF8(name, status_))
       return false;
     if (!IsValidCaptureName(name)) {
@@ -2089,7 +2089,8 @@
       return false;
     }
 
-    s->remove_prefix(static_cast<size_t>(capture.end() - s->begin()));
+    s->remove_prefix(
+        static_cast<size_t>(capture.data() + capture.size() - s->data()));
     return true;
   }
 
@@ -2173,7 +2174,7 @@
 BadPerlOp:
   status_->set_code(kRegexpBadPerlOp);
   status_->set_error_arg(
-      StringPiece(s->begin(), static_cast<size_t>(t.begin() - s->begin())));
+      StringPiece(s->data(), static_cast<size_t>(t.data() - s->data())));
   return false;
 }
 
@@ -2321,8 +2322,8 @@
             // (and a++ means something else entirely, which we don't support!)
             status->set_code(kRegexpRepeatOp);
             status->set_error_arg(StringPiece(
-                lastunary.begin(),
-                static_cast<size_t>(t.begin() - lastunary.begin())));
+                lastunary.data(),
+                static_cast<size_t>(t.data() - lastunary.data())));
             return NULL;
           }
         }
@@ -2354,8 +2355,8 @@
             // Not allowed to stack repetition operators.
             status->set_code(kRegexpRepeatOp);
             status->set_error_arg(StringPiece(
-                lastunary.begin(),
-                static_cast<size_t>(t.begin() - lastunary.begin())));
+                lastunary.data(),
+                static_cast<size_t>(t.data() - lastunary.data())));
             return NULL;
           }
         }
diff --git a/re2/prog.cc b/re2/prog.cc
index 5155943..cc35917 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -288,24 +288,24 @@
   int flags = 0;
 
   // ^ and \A
-  if (p == text.begin())
+  if (p == text.data())
     flags |= kEmptyBeginText | kEmptyBeginLine;
   else if (p[-1] == '\n')
     flags |= kEmptyBeginLine;
 
   // $ and \z
-  if (p == text.end())
+  if (p == text.data() + text.size())
     flags |= kEmptyEndText | kEmptyEndLine;
-  else if (p < text.end() && p[0] == '\n')
+  else if (p < text.data() + text.size() && p[0] == '\n')
     flags |= kEmptyEndLine;
 
   // \b and \B
-  if (p == text.begin() && p == text.end()) {
+  if (p == text.data() && p == text.data() + text.size()) {
     // no word boundary here
-  } else if (p == text.begin()) {
+  } else if (p == text.data()) {
     if (IsWordChar(p[0]))
       flags |= kEmptyWordBoundary;
-  } else if (p == text.end()) {
+  } else if (p == text.data() + text.size()) {
     if (IsWordChar(p[-1]))
       flags |= kEmptyWordBoundary;
   } else {
diff --git a/re2/re2.cc b/re2/re2.cc
index a4b4992..ef02d82 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -377,8 +377,8 @@
   if (!re.Rewrite(&s, rewrite, vec, nvec))
     return false;
 
-  assert(vec[0].begin() >= str->data());
-  assert(vec[0].end() <= str->data()+str->size());
+  assert(vec[0].data() >= str->data());
+  assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
   str->replace(vec[0].data() - str->data(), vec[0].size(), s);
   return true;
 }
@@ -406,9 +406,9 @@
     if (!re.Match(*str, static_cast<size_t>(p - str->data()),
                   str->size(), UNANCHORED, vec, nvec))
       break;
-    if (p < vec[0].begin())
-      out.append(p, vec[0].begin() - p);
-    if (vec[0].begin() == lastend && vec[0].size() == 0) {
+    if (p < vec[0].data())
+      out.append(p, vec[0].data() - p);
+    if (vec[0].data() == lastend && vec[0].size() == 0) {
       // Disallow empty match at end of last match: skip ahead.
       //
       // fullrune() takes int, not ptrdiff_t. However, it just looks
@@ -439,7 +439,7 @@
       continue;
     }
     re.Rewrite(&out, rewrite, vec, nvec);
-    p = vec[0].end();
+    p = vec[0].data() + vec[0].size();
     lastend = p;
     count++;
   }
diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc
index ae9fd82..6cde42d 100644
--- a/re2/testing/backtrack.cc
+++ b/re2/testing/backtrack.cc
@@ -105,7 +105,7 @@
                          StringPiece* submatch, int nsubmatch) {
   text_ = text;
   context_ = context;
-  if (context_.begin() == NULL)
+  if (context_.data() == NULL)
     context_ = text;
   if (prog_->anchor_start() && text.begin() > context_.begin())
     return false;
@@ -137,14 +137,14 @@
 
   // Anchored search must start at text.begin().
   if (anchored_) {
-    cap_[0] = text.begin();
-    return Visit(prog_->start(), text.begin());
+    cap_[0] = text.data();
+    return Visit(prog_->start(), text.data());
   }
 
   // Unanchored search, starting from each possible text position.
   // Notice that we have to try the empty string at the end of
   // the text, so the loop condition is p <= text.end(), not p < text.end().
-  for (const char* p = text.begin(); p <= text.end(); p++) {
+  for (const char* p = text.data(); p <= text.data() + text.size(); p++) {
     cap_[0] = p;
     if (Visit(prog_->start(), p))  // Match must be leftmost; done.
       return true;
@@ -158,8 +158,8 @@
   // Check bitmap.  If we've already explored from here,
   // either it didn't match or it did but we're hoping for a better match.
   // Either way, don't go down that road again.
-  CHECK(p <= text_.end());
-  size_t n = id*(text_.size()+1) + (p - text_.begin());
+  CHECK(p <= text_.data() + text_.size());
+  size_t n = id*(text_.size()+1) + (p - text_.data());
   CHECK_LT(n/32, nvisited_);
   if (visited_[n/32] & (1 << (n&31)))
     return false;
@@ -182,7 +182,7 @@
   // Pick out byte at current position.  If at end of string,
   // have to explore in hope of finishing a match.  Use impossible byte -1.
   int c = -1;
-  if (p < text_.end())
+  if (p < text_.data() + text_.size())
     c = *p & 0xFF;
 
   Prog::Inst* ip = prog_->inst(id);
@@ -224,11 +224,12 @@
     case kInstMatch:
       // We found a match.  If it's the best so far, record the
       // parameters in the caller's submatch_ array.
-      if (endmatch_ && p != context_.end())
+      if (endmatch_ && p != context_.data() + context_.size())
         return false;
       cap_[1] = p;
-      if (submatch_[0].data() == NULL ||           // First match so far ...
-          (longest_ && p > submatch_[0].end())) {  // ... or better match
+      if (submatch_[0].data() == NULL ||
+          (longest_ && p > submatch_[0].data() + submatch_[0].size())) {
+        // First match so far - or better match.
         for (int i = 0; i < nsubmatch_; i++)
           submatch_[i] = StringPiece(
               cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
diff --git a/re2/testing/exhaustive_tester.cc b/re2/testing/exhaustive_tester.cc
index 47950ba..cadd2b4 100644
--- a/re2/testing/exhaustive_tester.cc
+++ b/re2/testing/exhaustive_tester.cc
@@ -62,7 +62,7 @@
   for (int i = 0; i < n; i++) {
     if (i > 0)
       printf(" ");
-    if (m[i].begin() == NULL)
+    if (m[i].data() == NULL)
       printf("-");
     else
       printf("%td-%td",
diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc
index 1e4d3da..d156c88 100644
--- a/re2/testing/regexp_generator.cc
+++ b/re2/testing/regexp_generator.cc
@@ -241,7 +241,7 @@
 std::vector<std::string> Explode(const StringPiece& s) {
   std::vector<std::string> v;
 
-  for (const char *q = s.begin(); q < s.end(); ) {
+  for (const char *q = s.data(); q < s.data() + s.size(); ) {
     const char* p = q;
     Rune r;
     q += chartorune(&r, q);
@@ -259,8 +259,8 @@
   if (sep.size() == 0)
     return Explode(s);
 
-  const char *p = s.begin();
-  for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) {
+  const char *p = s.data();
+  for (const char *q = s.data(); q + sep.size() <= s.data() + s.size(); q++) {
     if (StringPiece(q, sep.size()) == sep) {
       v.push_back(std::string(p, q - p));
       p = q + sep.size();
@@ -268,8 +268,8 @@
       continue;
     }
   }
-  if (p < s.end())
-    v.push_back(std::string(p, s.end() - p));
+  if (p < s.data() + s.size())
+    v.push_back(std::string(p, s.data() + s.size() - p));
   return v;
 }
 
diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
index d676d9a..f08d717 100644
--- a/re2/testing/tester.cc
+++ b/re2/testing/tester.cc
@@ -99,7 +99,7 @@
 // where a and b are the starting and ending offsets of s in text.
 static std::string FormatCapture(const StringPiece& text,
                                  const StringPiece& s) {
-  if (s.begin() == NULL)
+  if (s.data() == NULL)
     return "(?,?)";
   return StringPrintf("(%td,%td)",
                       s.begin() - text.begin(), s.end() - text.begin());
@@ -489,7 +489,7 @@
     return false;
   if (r.have_submatch || r.have_submatch0) {
     for (int i = 0; i < kMaxSubmatch; i++) {
-      if (correct.submatch[i].begin() != r.submatch[i].begin() ||
+      if (correct.submatch[i].data() != r.submatch[i].data() ||
           correct.submatch[i].size() != r.submatch[i].size())
         return false;
       if (!r.have_submatch)
@@ -555,8 +555,8 @@
       }
     }
     for (int i = 0; i < 1+num_captures_; i++) {
-      if (r.submatch[i].begin() != correct.submatch[i].begin() ||
-          r.submatch[i].end() != correct.submatch[i].end()) {
+      if (r.submatch[i].data() != correct.submatch[i].data() ||
+          r.submatch[i].size() != correct.submatch[i].size()) {
         LOG(INFO) <<
           StringPrintf("   $%d: should be %s is %s",
                        i,