Implement "front and back" prefix accel.
Change-Id: Ib585680e7b8ff1463db4cea55539e6fb52eed1d4
Reviewed-on: https://code-review.googlesource.com/c/re2/+/56657
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/prog.cc b/re2/prog.cc
index 06d58c8..fdc8022 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -7,6 +7,9 @@
#include "re2/prog.h"
+#if defined(__GNUC__) && defined(__AVX2__)
+#include <immintrin.h>
+#endif
#include <stdint.h>
#include <string.h>
#include <algorithm>
@@ -910,4 +913,49 @@
}
}
+const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
+ DCHECK_GE(prefix_size_, 2);
+ if (size < prefix_size_)
+ return NULL;
+ // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
+ // This also means that probing for prefix_back_ doesn't go out of bounds.
+ size -= prefix_size_-1;
+
+#if defined(__GNUC__) && defined(__AVX2__)
+ // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
+ if (size >= sizeof(__m256i)) {
+ const __m256i* fp = reinterpret_cast<const __m256i*>(
+ reinterpret_cast<const char*>(data));
+ const __m256i* bp = reinterpret_cast<const __m256i*>(
+ reinterpret_cast<const char*>(data) + prefix_size_-1);
+ const __m256i* endfp = fp + size/sizeof(__m256i);
+ const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
+ const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
+ while (fp != endfp) {
+ const __m256i f_loadu = _mm256_loadu_si256(fp++);
+ const __m256i b_loadu = _mm256_loadu_si256(bp++);
+ const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
+ const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
+ const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
+ if (fb_testz == 0) { // ZF: 1 means zero, 0 means non-zero.
+ const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
+ const int fb_movemask = _mm256_movemask_epi8(fb_and);
+ const int fb_ctz = __builtin_ctz(fb_movemask);
+ return reinterpret_cast<const char*>(fp-1) + fb_ctz;
+ }
+ }
+ data = fp;
+ size = size%sizeof(__m256i);
+ }
+#endif
+
+ const char* p0 = reinterpret_cast<const char*>(data);
+ for (const char* p = p0;; p++) {
+ DCHECK_GE(size, static_cast<size_t>(p-p0));
+ p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
+ if (p == NULL || p[prefix_size_-1] == prefix_back_)
+ return p;
+ }
+}
+
} // namespace re2
diff --git a/re2/prog.h b/re2/prog.h
index e54208c..e9ce682 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -220,10 +220,15 @@
// Accelerates to the first likely occurrence of the prefix.
// Returns a pointer to the first byte or NULL if not found.
const void* PrefixAccel(const void* data, size_t size) {
- DCHECK_NE(prefix_size_, 0);
- return memchr(data, prefix_front_, size);
+ DCHECK_GE(prefix_size_, 1);
+ return prefix_size_ == 1 ? memchr(data, prefix_front_, size)
+ : PrefixAccel_FrontAndBack(data, size);
}
+ // An implementation of prefix accel that looks for prefix_front_ and
+ // prefix_back_ to return fewer false positives than memchr(3) alone.
+ const void* PrefixAccel_FrontAndBack(const void* data, size_t size);
+
// Returns string representation of program for debugging.
std::string Dump();
std::string DumpUnanchored();