Remove first_byte from StartInfo and simplify accordingly.
Change-Id: I42786d4934e00ef7582d1a5d71a06ae624b05be5
Reviewed-on: https://code-review.googlesource.com/c/re2/+/55872
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 7ffb497..f0a79f0 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -167,12 +167,6 @@
typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
private:
- // Special "first_byte" values for a state. (Values >= 0 denote actual bytes.)
- enum {
- kFbUnknown = -1, // No analysis has been performed.
- kFbNone = -2, // The first-byte trick cannot be used.
- };
-
enum {
// Indices into start_ for unanchored searches.
// Add kStartAnchored for anchored searches.
@@ -239,25 +233,26 @@
struct SearchParams {
SearchParams(const StringPiece& text, const StringPiece& context,
RWLocker* cache_lock)
- : text(text), context(context),
+ : text(text),
+ context(context),
anchored(false),
+ have_first_byte(false),
want_earliest_match(false),
run_forward(false),
start(NULL),
- first_byte(kFbUnknown),
cache_lock(cache_lock),
failed(false),
ep(NULL),
- matches(NULL) { }
+ matches(NULL) {}
StringPiece text;
StringPiece context;
bool anchored;
+ bool have_first_byte;
bool want_earliest_match;
bool run_forward;
State* start;
- int first_byte;
- RWLocker *cache_lock;
+ RWLocker* cache_lock;
bool failed; // "out" parameter: whether search gave up
const char* ep; // "out" parameter: end pointer for match
SparseSet* matches;
@@ -268,15 +263,13 @@
};
// Before each search, the parameters to Search are analyzed by
- // AnalyzeSearch to determine the state in which to start and the
- // "first_byte" for that state, if any.
+ // AnalyzeSearch to determine the state in which to start.
struct StartInfo {
- StartInfo() : start(NULL), first_byte(kFbUnknown) {}
- State* start;
- std::atomic<int> first_byte;
+ StartInfo() : start(NULL) {}
+ std::atomic<State*> start;
};
- // Fills in params->start and params->first_byte using
+ // Fills in params->start and params->have_first_byte using
// the other search parameters. Returns true on success,
// false on failure.
// cache_mutex_.r <= L < mutex_
@@ -1183,10 +1176,8 @@
});
// Clear the cache, reset the memory budget.
- for (int i = 0; i < kMaxStart; i++) {
- start_[i].start = NULL;
- start_[i].first_byte.store(kFbUnknown, std::memory_order_relaxed);
- }
+ for (int i = 0; i < kMaxStart; i++)
+ start_[i].start.store(NULL, std::memory_order_relaxed);
ClearCache();
mem_budget_ = state_budget_;
}
@@ -1349,6 +1340,7 @@
swap(p, ep);
}
+ int first_byte = prog_->first_byte();
const uint8_t* bytemap = prog_->bytemap();
const uint8_t* lastmatch = NULL; // most recent matching position in text
bool matched = false;
@@ -1385,7 +1377,7 @@
// so use optimized assembly in memchr to skip ahead.
// If first_byte isn't found, we can skip to the end
// of the string.
- if ((p = BytePtr(memchr(p, params->first_byte, ep - p))) == NULL) {
+ if ((p = BytePtr(memchr(p, first_byte, ep - p))) == NULL) {
p = ep;
break;
}
@@ -1589,7 +1581,7 @@
// For debugging, calls the general code directly.
bool DFA::SlowSearchLoop(SearchParams* params) {
return InlinedSearchLoop(params,
- params->first_byte >= 0,
+ params->have_first_byte,
params->want_earliest_match,
params->run_forward);
}
@@ -1610,8 +1602,7 @@
&DFA::SearchTTT,
};
- bool have_first_byte = params->first_byte >= 0;
- int index = 4 * have_first_byte +
+ int index = 4 * params->have_first_byte +
2 * params->want_earliest_match +
1 * params->run_forward;
return (this->*Searches[index])(params);
@@ -1703,13 +1694,21 @@
}
}
- if (ExtraDebug)
- fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s first_byte=%d\n",
- params->anchored, params->run_forward, flags,
- DumpState(info->start).c_str(), info->first_byte.load());
+ params->start = info->start.load(std::memory_order_acquire);
- params->start = info->start;
- params->first_byte = info->first_byte.load(std::memory_order_acquire);
+ // Even if we have a first byte, we cannot use it when anchored and,
+ // less obviously, we cannot use it when we are going to need flags.
+ // This trick works only when there is a single byte that leads to a
+ // different state!
+ if (prog_->first_byte() >= 0 &&
+ !params->anchored &&
+ params->start->flag_ >> kFlagNeedShift == 0)
+ params->have_first_byte = true;
+
+ if (ExtraDebug)
+ fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s have_first_byte=%d\n",
+ params->anchored, params->run_forward, flags,
+ DumpState(params->start).c_str(), params->have_first_byte);
return true;
}
@@ -1718,47 +1717,25 @@
bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
uint32_t flags) {
// Quick check.
- int fb = info->first_byte.load(std::memory_order_acquire);
- if (fb != kFbUnknown)
+ State* start = info->start.load(std::memory_order_acquire);
+ if (start != NULL)
return true;
MutexLock l(&mutex_);
- fb = info->first_byte.load(std::memory_order_relaxed);
- if (fb != kFbUnknown)
+ start = info->start.load(std::memory_order_relaxed);
+ if (start != NULL)
return true;
q0_->clear();
AddToQueue(q0_,
params->anchored ? prog_->start() : prog_->start_unanchored(),
flags);
- info->start = WorkqToCachedState(q0_, NULL, flags);
- if (info->start == NULL)
+ start = WorkqToCachedState(q0_, NULL, flags);
+ if (start == NULL)
return false;
- if (info->start == DeadState) {
- // Synchronize with "quick check" above.
- info->first_byte.store(kFbNone, std::memory_order_release);
- return true;
- }
-
- if (info->start == FullMatchState) {
- // Synchronize with "quick check" above.
- info->first_byte.store(kFbNone, std::memory_order_release); // will be ignored
- return true;
- }
-
- // Even if we have a first_byte, we cannot use it when anchored and,
- // less obviously, we cannot use it when we are going to need flags.
- // This trick works only when there is a single byte that leads to a
- // different state!
- int first_byte = prog_->first_byte();
- if (first_byte == -1 ||
- params->anchored ||
- info->start->flag_ >> kFlagNeedShift != 0)
- first_byte = kFbNone;
-
// Synchronize with "quick check" above.
- info->first_byte.store(first_byte, std::memory_order_release);
+ info->start.store(start, std::memory_order_release);
return true;
}