Revert the commits for the shard_cache_mutex option.
Change-Id: I663349ffcc7215dd4b8478da8d34a6b67effff47
Reviewed-on: https://code-review.googlesource.com/c/36650
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/Makefile b/Makefile
index 110ca73..f001f06 100644
--- a/Makefile
+++ b/Makefile
@@ -7,11 +7,6 @@
# CCICU=$(shell pkg-config icu-uc --cflags) -DRE2_USE_ICU
# LDICU=$(shell pkg-config icu-uc --libs)
-# To use NUMA topology to shard the DFA state cache mutex,
-# uncomment the next two lines:
-# CCNUMA=-DRE2_USE_NUMA
-# LDNUMA=-lnuma
-
# To build against PCRE for testing or benchmarking,
# uncomment the next two lines:
# CCPCRE=-I/usr/local/include -DUSEPCRE
@@ -22,8 +17,8 @@
CXXFLAGS?=-O3 -g
LDFLAGS?=
# required
-RE2_CXXFLAGS?=-std=c++11 -pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCNUMA) $(CCPCRE)
-RE2_LDFLAGS?=-pthread $(LDICU) $(LDNUMA) $(LDPCRE)
+RE2_CXXFLAGS?=-std=c++11 -pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE)
+RE2_LDFLAGS?=-pthread $(LDICU) $(LDPCRE)
AR?=ar
ARFLAGS?=rsc
NM?=nm
@@ -292,7 +287,7 @@
@echo
static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
-static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDNUMA) $(LDFLAGS)
+static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS)
static-testinstall:
@mkdir -p obj
@cp testinstall.cc obj
@@ -306,7 +301,7 @@
endif
shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
-shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDNUMA) $(LDFLAGS)
+shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS)
shared-testinstall:
@mkdir -p obj
@cp testinstall.cc obj
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 5574071..9bc8499 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -36,7 +36,6 @@
#include <utility>
#include <vector>
-#include "util/util.h"
#include "util/logging.h"
#include "util/mix.h"
#include "util/mutex.h"
@@ -45,11 +44,6 @@
#include "re2/prog.h"
#include "re2/stringpiece.h"
-#if defined(RE2_USE_NUMA)
-#include <numa.h>
-#include <sched.h>
-#endif
-
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
#ifdef _MSC_VER
#pragma warning(disable: 4200)
@@ -273,7 +267,7 @@
bool run_forward;
State* start;
int first_byte;
- RWLocker* cache_lock;
+ RWLocker *cache_lock;
bool failed; // "out" parameter: whether search gave up
const char* ep; // "out" parameter: end pointer for match
SparseSet* matches;
@@ -343,12 +337,6 @@
return prog_->bytemap()[c];
}
- // Determines how many ways to shard cache_mutex_.
- int CacheMutexCount() const;
-
- // Determines which shard of cache_mutex_ to use.
- int WhichCacheMutex() const;
-
// Constant after initialization.
Prog* prog_; // The regular expression program to run.
Prog::MatchKind kind_; // The kind of DFA.
@@ -363,32 +351,18 @@
int nastack_;
// State* cache. Many threads use and add to the cache simultaneously,
- // holding one cache_mutex_ for reading and mutex_ (above) when adding.
+ // holding cache_mutex_ for reading and mutex_ (above) when adding.
// If the cache fills and needs to be discarded, the discarding is done
- // while holding every cache_mutex_ for writing to avoid disrupting any
- // readers. Any State* is valid only while one (or every) cache_mutex_
- // is held by the thread using the State*.
- class alignas(CACHELINE_SIZE) AlignedMutex : public Mutex {};
- AlignedMutex* cache_mutex_;
+ // while holding cache_mutex_ for writing, to avoid interrupting other
+ // readers. Any State* pointers are only valid while cache_mutex_
+ // is held.
+ Mutex cache_mutex_;
int64_t mem_budget_; // Total memory budget for all States.
int64_t state_budget_; // Amount of memory remaining for new States.
StateSet state_cache_; // All States computed so far.
StartInfo start_[kMaxStart];
-
- // Until we can use C++17, we must handle the alignment ourselves. :(
- // Someday, std::unique_ptr<AlignedMutex[]> will be quite sufficient.
- char* cache_mutex_storage_;
- int cache_mutex_count_;
};
-template <typename T>
-static inline T* Align(T* base, size_t align) {
- intptr_t tmp = reinterpret_cast<intptr_t>(base);
- tmp += align - 1;
- tmp &= ~(align - 1);
- return reinterpret_cast<T*>(tmp);
-}
-
// Shorthand for casting to uint8_t*.
static inline const uint8_t* BytePtr(const void* v) {
return reinterpret_cast<const uint8_t*>(v);
@@ -468,9 +442,7 @@
q0_(NULL),
q1_(NULL),
astack_(NULL),
- mem_budget_(max_mem),
- cache_mutex_storage_(NULL),
- cache_mutex_count_(CacheMutexCount()) {
+ mem_budget_(max_mem) {
if (ExtraDebug)
fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
int nmark = 0;
@@ -482,13 +454,11 @@
prog_->inst_count(kInstNop) +
nmark + 1; // + 1 for start inst
- // Account for memory needed for DFA, q0, q1, astack, cache mutexes.
+ // Account for space needed for DFA, q0, q1, astack.
mem_budget_ -= sizeof(DFA);
mem_budget_ -= (prog_->size() + nmark) *
(sizeof(int)+sizeof(int)) * 2; // q0, q1
mem_budget_ -= nastack_ * sizeof(int); // astack
- mem_budget_ -= alignof(AlignedMutex) +
- sizeof(AlignedMutex) * cache_mutex_count_; // cache mutexes
if (mem_budget_ < 0) {
init_failed_ = true;
return;
@@ -513,23 +483,12 @@
q0_ = new Workq(prog_->size(), nmark);
q1_ = new Workq(prog_->size(), nmark);
astack_ = new int[nastack_];
-
- char* storage = new char[alignof(AlignedMutex) +
- sizeof(AlignedMutex) * cache_mutex_count_];
- cache_mutex_storage_ = storage;
- cache_mutex_ = new (Align(storage, alignof(AlignedMutex)))
- AlignedMutex[cache_mutex_count_]();
}
DFA::~DFA() {
- if (cache_mutex_storage_ != NULL) {
- for (int i = 0; i < cache_mutex_count_; i++)
- cache_mutex_[i].~AlignedMutex();
- }
- delete[] cache_mutex_storage_;
- delete[] astack_;
- delete q1_;
delete q0_;
+ delete q1_;
+ delete[] astack_;
ClearCache();
}
@@ -811,8 +770,8 @@
mem_budget_ -= mem + kStateCacheOverhead;
// Allocate new state along with room for next_ and inst_.
- char* storage = std::allocator<char>().allocate(mem);
- State* s = new (storage) State;
+ char* space = std::allocator<char>().allocate(mem);
+ State* s = new (space) State;
(void) new (s->next_) std::atomic<State*>[nnext];
// Work around a unfortunate bug in older versions of libstdc++.
// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658)
@@ -1152,7 +1111,7 @@
class DFA::RWLocker {
public:
- explicit RWLocker(DFA* dfa);
+ explicit RWLocker(Mutex* mu);
~RWLocker();
// If the lock is only held for reading right now,
@@ -1162,39 +1121,35 @@
void LockForWriting();
private:
- DFA* dfa_;
- int which_;
+ Mutex* mu_;
bool writing_;
RWLocker(const RWLocker&) = delete;
RWLocker& operator=(const RWLocker&) = delete;
};
-DFA::RWLocker::RWLocker(DFA* dfa)
- : dfa_(dfa), which_(dfa_->WhichCacheMutex()), writing_(false) {
- dfa_->cache_mutex_[which_].ReaderLock();
+DFA::RWLocker::RWLocker(Mutex* mu) : mu_(mu), writing_(false) {
+ mu_->ReaderLock();
}
// This function is marked as NO_THREAD_SAFETY_ANALYSIS because the annotations
// does not support lock upgrade.
void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
if (!writing_) {
- dfa_->cache_mutex_[which_].ReaderUnlock();
- for (int i = 0; i < dfa_->cache_mutex_count_; i++)
- dfa_->cache_mutex_[i].WriterLock();
+ mu_->ReaderUnlock();
+ mu_->WriterLock();
writing_ = true;
}
}
DFA::RWLocker::~RWLocker() {
- if (!writing_) {
- dfa_->cache_mutex_[which_].ReaderUnlock();
- } else {
- for (int i = 0; i < dfa_->cache_mutex_count_; i++)
- dfa_->cache_mutex_[i].WriterUnlock();
- }
+ if (!writing_)
+ mu_->ReaderUnlock();
+ else
+ mu_->WriterUnlock();
}
+
// When the DFA's State cache fills, we discard all the states in the
// cache and start over. Many threads can be using and adding to the
// cache at the same time, so we synchronize using the cache_mutex_
@@ -1820,7 +1775,7 @@
run_forward, kind_);
}
- RWLocker l(this);
+ RWLocker l(&cache_mutex_);
SearchParams params(text, context, &l);
params.anchored = anchored;
params.want_earliest_match = want_earliest_match;
@@ -1970,7 +1925,7 @@
// Pick out start state for unanchored search
// at beginning of text.
- RWLocker l(this);
+ RWLocker l(&cache_mutex_);
SearchParams params(StringPiece(), StringPiece(), &l);
params.anchored = false;
if (!AnalyzeSearch(¶ms) ||
@@ -2062,7 +2017,7 @@
std::unordered_map<State*, int> previously_visited_states;
// Pick out start state for anchored search at beginning of text.
- RWLocker l(this);
+ RWLocker l(&cache_mutex_);
SearchParams params(StringPiece(), StringPiece(), &l);
params.anchored = true;
if (!AnalyzeSearch(¶ms))
@@ -2185,55 +2140,4 @@
return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
}
-#if defined(RE2_USE_NUMA)
-static int numa_max_count;
-static std::vector<int>* numa_cpu_map;
-
-void InitNUMA() {
- numa_max_count = 0;
- numa_cpu_map = new std::vector<int>;
- if (numa_available() == -1)
- return;
- int nodes = numa_num_configured_nodes();
- if (nodes < 1)
- return;
- int cpus = numa_num_configured_cpus();
- if (cpus < 1)
- return;
- std::vector<int> count;
- count.resize(nodes);
- for (int cpu = 0; cpu < cpus; cpu++) {
- int node = numa_node_of_cpu(cpu);
- numa_cpu_map->emplace_back(count[node]++);
- }
- numa_max_count = *std::max_element(count.begin(), count.end());
-}
-#endif
-
-int DFA::CacheMutexCount() const {
-#if !defined(RE2_USE_NUMA)
- return 1;
-#else
- if (!prog_->shard_cache_mutex())
- return 1;
- static std::once_flag numa_once;
- std::call_once(numa_once, &InitNUMA);
- if (numa_max_count == 0)
- return 1;
- return numa_max_count;
-#endif
-}
-
-int DFA::WhichCacheMutex() const {
-#if !defined(RE2_USE_NUMA)
- return 0;
-#else
- if (!prog_->shard_cache_mutex())
- return 0;
- if (numa_cpu_map->empty())
- return 0;
- return (*numa_cpu_map)[sched_getcpu()];
-#endif
-}
-
} // namespace re2
diff --git a/re2/prog.cc b/re2/prog.cc
index 1103111..b0ae375 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -115,7 +115,6 @@
inst_(NULL),
onepass_nodes_(NULL),
dfa_mem_(0),
- shard_cache_mutex_(false),
dfa_first_(NULL),
dfa_longest_(NULL) {
}
diff --git a/re2/prog.h b/re2/prog.h
index 3bfc9e9..6d48135 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -200,8 +200,6 @@
int inst_count(InstOp op) { return inst_count_[op]; }
void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
int64_t dfa_mem() { return dfa_mem_; }
- void set_shard_cache_mutex(bool b) { shard_cache_mutex_ = b; }
- bool shard_cache_mutex() { return shard_cache_mutex_; }
int flags() { return flags_; }
void set_flags(int flags) { flags_ = flags; }
bool anchor_start() { return anchor_start_; }
@@ -401,7 +399,6 @@
uint8_t* onepass_nodes_; // data for OnePass nodes
int64_t dfa_mem_; // Maximum memory for DFAs.
- bool shard_cache_mutex_; // shard the DFA state cache mutex
DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch
DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch
diff --git a/re2/re2.cc b/re2/re2.cc
index 82911bc..f3f96ae 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -49,7 +49,6 @@
dot_nl_(false),
never_capture_(false),
case_sensitive_(true),
- shard_cache_mutex_(false),
perl_classes_(false),
word_boundary_(false),
one_line_(false) {
@@ -211,9 +210,7 @@
// one third to the reverse prog, because the forward
// Prog has two DFAs but the reverse prog has one.
prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
- if (prog_ != NULL) {
- prog_->set_shard_cache_mutex(options_.shard_cache_mutex());
- } else {
+ if (prog_ == NULL) {
if (options_.log_errors())
LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
error_ = new string("pattern too large - compile failed");
@@ -234,9 +231,7 @@
std::call_once(rprog_once_, [](const RE2* re) {
re->rprog_ =
re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
- if (re->rprog_ != NULL) {
- re->rprog_->set_shard_cache_mutex(re->options_.shard_cache_mutex());
- } else {
+ if (re->rprog_ == NULL) {
if (re->options_.log_errors())
LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
re->error_ = new string("pattern too large - reverse compile failed");
diff --git a/re2/re2.h b/re2/re2.h
index 5ea8efd..c93de56 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -560,7 +560,6 @@
// never_capture (false) parse all parens as non-capturing
// case_sensitive (true) match is case-sensitive (regexp can override
// with (?i) unless in posix_syntax mode)
- // shard_cache_mutex (false) shard the DFA state cache mutex (see below)
//
// The following options are only consulted when posix_syntax == true.
// When posix_syntax == false, these features are always enabled and
@@ -597,11 +596,6 @@
//
// Once a DFA fills its budget, it flushes its cache and starts over.
// If this happens too often, RE2 falls back on the NFA implementation.
- //
- // The shard_cache_mutex option causes RE2 to shard the DFA state cache
- // mutex N ways; N will be determined automatically. This increases the
- // memory footprint somewhat, so it should not be used unless profiling
- // shows that reader lock contention is incurring substantial overhead.
// For now, make the default budget something close to Code Search.
static const int kDefaultMaxMem = 8<<20;
@@ -622,7 +616,6 @@
dot_nl_(false),
never_capture_(false),
case_sensitive_(true),
- shard_cache_mutex_(false),
perl_classes_(false),
word_boundary_(false),
one_line_(false) {
@@ -671,9 +664,6 @@
bool case_sensitive() const { return case_sensitive_; }
void set_case_sensitive(bool b) { case_sensitive_ = b; }
- bool shard_cache_mutex() const { return shard_cache_mutex_; }
- void set_shard_cache_mutex(bool b) { shard_cache_mutex_ = b; }
-
bool perl_classes() const { return perl_classes_; }
void set_perl_classes(bool b) { perl_classes_ = b; }
@@ -700,7 +690,6 @@
bool dot_nl_;
bool never_capture_;
bool case_sensitive_;
- bool shard_cache_mutex_;
bool perl_classes_;
bool word_boundary_;
bool one_line_;
diff --git a/re2/set.cc b/re2/set.cc
index a624d6a..9890de9 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -99,8 +99,6 @@
delete[] sub;
prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
- if (prog_ != NULL)
- prog_->set_shard_cache_mutex(options_.shard_cache_mutex());
re->Decref();
return prog_ != NULL;
}
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
index 95044e9..8b82e0b 100644
--- a/re2/testing/regexp_benchmark.cc
+++ b/re2/testing/regexp_benchmark.cc
@@ -1574,36 +1574,4 @@
BENCHMARK(PossibleMatchRange_Prefix);
BENCHMARK(PossibleMatchRange_NoProg);
-static const RE2& GetShardCacheMutexRE(bool shard_cache_mutex) {
- auto GetRE = [=]() {
- RE2::Options options;
- options.set_shard_cache_mutex(shard_cache_mutex);
- return new RE2("foo*bar+qux?", options);
- };
- if (!shard_cache_mutex) {
- static const auto* const re = GetRE();
- return *re;
- } else {
- static const auto* const re = GetRE();
- return *re;
- }
-}
-
-void ShardCacheMutex_False(int n) {
- const RE2& re = GetShardCacheMutexRE(false);
- for (int i = 0; i < n; i++) {
- RE2::PartialMatch("", re);
- }
-}
-
-void ShardCacheMutex_True(int n) {
- const RE2& re = GetShardCacheMutexRE(true);
- for (int i = 0; i < n; i++) {
- RE2::PartialMatch("", re);
- }
-}
-
-BENCHMARK(ShardCacheMutex_False)->ThreadRange(1, NumCPUs());
-BENCHMARK(ShardCacheMutex_True)->ThreadRange(1, NumCPUs());
-
} // namespace re2
diff --git a/util/util.h b/util/util.h
index 4233956..33d100a 100644
--- a/util/util.h
+++ b/util/util.h
@@ -21,10 +21,6 @@
#endif
#endif
-#ifndef CACHELINE_SIZE
-#define CACHELINE_SIZE 64
-#endif
-
#ifndef FALLTHROUGH_INTENDED
#if defined(__clang__)
#define FALLTHROUGH_INTENDED [[clang::fallthrough]]