Optimise the hot loop some more for Clang.
Change-Id: I751c9f5a6bebc068c86ff6f2f78f5488f5ee3d23
Reviewed-on: https://code-review.googlesource.com/c/re2/+/58931
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/prog.cc b/re2/prog.cc
index 5139523..3979bd2 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -917,6 +917,10 @@
}
}
+// The final state will always be this, which frees up a register for the hot
+// loop and thus avoids the spilling that can occur when building with Clang.
+static const size_t kShiftDFAFinal = 9;
+
// This function takes the prefix as std::string (i.e. not const std::string&
// as normal) because it's going to clobber it, so a temporary is convenient.
static uint64_t* BuildShiftDFA(std::string prefix) {
@@ -965,6 +969,8 @@
uint16_t ncurr = states[dcurr];
uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
size_t dnext = dcurr+1;
+ if (dnext == size)
+ dnext = kShiftDFAFinal;
states[dnext] = nnext;
}
@@ -994,7 +1000,7 @@
// in the hot loop, we check for a match only at the end of each iteration,
// so we must keep signalling the match until we get around to checking it.
for (int b = 0; b < 256; ++b)
- dfa[b] |= static_cast<uint64_t>(size * 6) << (size * 6);
+ dfa[b] |= static_cast<uint64_t>(kShiftDFAFinal * 6) << (kShiftDFAFinal * 6);
return dfa;
}
@@ -1006,7 +1012,7 @@
if (prefix_foldcase_) {
// Use PrefixAccel_ShiftDFA().
// ... and no more than nine bytes of the prefix. (See above for details.)
- prefix_size_ = std::min(prefix_size_, size_t{9});
+ prefix_size_ = std::min(prefix_size_, kShiftDFAFinal);
prefix_dfa_ = BuildShiftDFA(prefix.substr(0, prefix_size_));
} else if (prefix_size_ != 1) {
// Use PrefixAccel_FrontAndBack().
@@ -1057,7 +1063,7 @@
uint64_t curr6 = next6 >> (curr5 & 63);
uint64_t curr7 = next7 >> (curr6 & 63);
- if ((curr7 & 63) == prefix_size_ * 6) {
+ if ((curr7 & 63) == kShiftDFAFinal * 6) {
// At the time of writing, using the same masking subexpressions from
// the preceding lines caused Clang to clutter the hot loop computing
// them - even though they aren't actually needed for shifting! Hence
@@ -1085,7 +1091,7 @@
uint8_t b = *p++;
uint64_t next = prefix_dfa_[b];
curr = next >> (curr & 63);
- if ((curr & 63) == prefix_size_ * 6)
+ if ((curr & 63) == kShiftDFAFinal * 6)
return p-prefix_size_;
}
return NULL;