Use NUMA topology to shard the DFA state cache mutex.
Change-Id: I3df2b59a5c98a66de1198794f52463eda8b65608
Reviewed-on: https://code-review.googlesource.com/c/36330
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/Makefile b/Makefile
index f001f06..110ca73 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,11 @@
# CCICU=$(shell pkg-config icu-uc --cflags) -DRE2_USE_ICU
# LDICU=$(shell pkg-config icu-uc --libs)
+# To use NUMA topology to shard the DFA state cache mutex,
+# uncomment the next two lines:
+# CCNUMA=-DRE2_USE_NUMA
+# LDNUMA=-lnuma
+
# To build against PCRE for testing or benchmarking,
# uncomment the next two lines:
# CCPCRE=-I/usr/local/include -DUSEPCRE
@@ -17,8 +22,8 @@
CXXFLAGS?=-O3 -g
LDFLAGS?=
# required
-RE2_CXXFLAGS?=-std=c++11 -pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE)
-RE2_LDFLAGS?=-pthread $(LDICU) $(LDPCRE)
+RE2_CXXFLAGS?=-std=c++11 -pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCNUMA) $(CCPCRE)
+RE2_LDFLAGS?=-pthread $(LDICU) $(LDNUMA) $(LDPCRE)
AR?=ar
ARFLAGS?=rsc
NM?=nm
@@ -287,7 +292,7 @@
@echo
static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
-static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS)
+static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDNUMA) $(LDFLAGS)
static-testinstall:
@mkdir -p obj
@cp testinstall.cc obj
@@ -301,7 +306,7 @@
endif
shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
-shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS)
+shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDNUMA) $(LDFLAGS)
shared-testinstall:
@mkdir -p obj
@cp testinstall.cc obj
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 2df10b2..5c50b4a 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -45,6 +45,11 @@
#include "re2/prog.h"
#include "re2/stringpiece.h"
+#if defined(RE2_USE_NUMA)
+#include <numa.h>
+#include <sched.h>
+#endif
+
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
#ifdef _MSC_VER
#pragma warning(disable: 4200)
@@ -338,6 +343,12 @@
return prog_->bytemap()[c];
}
+ // Determines how many ways to shard cache_mutex_.
+ int CacheMutexCount() const;
+
+ // Determines which shard of cache_mutex_ to use.
+ int WhichCacheMutex() const;
+
// Constant after initialization.
Prog* prog_; // The regular expression program to run.
Prog::MatchKind kind_; // The kind of DFA.
@@ -459,7 +470,7 @@
astack_(NULL),
mem_budget_(max_mem),
cache_mutex_storage_(NULL),
- cache_mutex_count_(1) {
+ cache_mutex_count_(CacheMutexCount()) {
if (ExtraDebug)
fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
int nmark = 0;
@@ -1157,7 +1168,8 @@
RWLocker& operator=(const RWLocker&) = delete;
};
-DFA::RWLocker::RWLocker(DFA* dfa) : dfa_(dfa), which_(0), writing_(false) {
+DFA::RWLocker::RWLocker(DFA* dfa)
+ : dfa_(dfa), which_(dfa_->WhichCacheMutex()), writing_(false) {
dfa_->cache_mutex_[which_].ReaderLock();
}
@@ -2171,4 +2183,55 @@
return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
}
+#if defined(RE2_USE_NUMA)
+static int numa_max_count;
+static std::vector<int>* numa_cpu_map;
+
+void InitNUMA() {
+ numa_max_count = 0;
+ numa_cpu_map = new std::vector<int>;
+ if (numa_available() == -1)
+ return;
+ int nodes = numa_num_configured_nodes();
+ if (nodes < 1)
+ return;
+ int cpus = numa_num_configured_cpus();
+ if (cpus < 1)
+ return;
+ std::vector<int> count;
+ count.resize(nodes);
+ for (int cpu = 0; cpu < cpus; cpu++) {
+ int node = numa_node_of_cpu(cpu);
+ numa_cpu_map->emplace_back(count[node]++);
+ }
+ numa_max_count = *std::max_element(count.begin(), count.end());
+}
+#endif
+
+int DFA::CacheMutexCount() const {
+#if !defined(RE2_USE_NUMA)
+ return 1;
+#else
+ if (!prog_->shard_cache_mutex())
+ return 1;
+ static std::once_flag numa_once;
+ std::call_once(numa_once, &InitNUMA);
+ if (numa_max_count == 0)
+ return 1;
+ return numa_max_count;
+#endif
+}
+
+int DFA::WhichCacheMutex() const {
+#if !defined(RE2_USE_NUMA)
+ return 0;
+#else
+ if (!prog_->shard_cache_mutex())
+ return 0;
+ if (numa_cpu_map->empty())
+ return 0;
+ return (*numa_cpu_map)[sched_getcpu()];
+#endif
+}
+
} // namespace re2