Implement linear-time append for patch lists.
Change-Id: I201f02f37ea021ea3fdfa27925c66c1b19e4123b
Reviewed-on: https://code-review.googlesource.com/c/re2/+/57370
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/compile.cc b/re2/compile.cc
index 30e55f5..4ba2852 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc
@@ -30,91 +30,57 @@
// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
//
// Because the out and out1 fields in Inst are no longer pointers,
-// we can't use pointers directly here either. Instead, p refers
-// to inst_[p>>1].out (p&1 == 0) or inst_[p>>1].out1 (p&1 == 1).
-// p == 0 represents the NULL list. This is okay because instruction #0
+// we can't use pointers directly here either. Instead, head refers
+// to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1).
+// head == 0 represents the NULL list. This is okay because instruction #0
// is always the fail instruction, which never appears on a list.
-
struct PatchList {
- uint32_t p;
-
// Returns patch list containing just p.
- static PatchList Mk(uint32_t p);
+ static PatchList Mk(uint32_t p) {
+ return {p, p};
+ }
- // Patches all the entries on l to have value v.
+ // Patches all the entries on l to have value p.
// Caller must not ever use patch list again.
- static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v);
-
- // Deref returns the next pointer pointed at by p.
- static PatchList Deref(Prog::Inst *inst0, PatchList l);
-
- // Appends two patch lists and returns result.
- static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2);
-};
-
-static PatchList nullPatchList = { 0 };
-
-// Returns patch list containing just p.
-PatchList PatchList::Mk(uint32_t p) {
- PatchList l;
- l.p = p;
- return l;
-}
-
-// Returns the next pointer pointed at by l.
-PatchList PatchList::Deref(Prog::Inst* inst0, PatchList l) {
- Prog::Inst* ip = &inst0[l.p>>1];
- if (l.p&1)
- l.p = ip->out1();
- else
- l.p = ip->out();
- return l;
-}
-
-// Patches all the entries on l to have value v.
-void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32_t val) {
- while (l.p != 0) {
- Prog::Inst* ip = &inst0[l.p>>1];
- if (l.p&1) {
- l.p = ip->out1();
- ip->out1_ = val;
- } else {
- l.p = ip->out();
- ip->set_out(val);
+ static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) {
+ while (l.head != 0) {
+ Prog::Inst* ip = &inst0[l.head>>1];
+ if (l.head&1) {
+ l.head = ip->out1();
+ ip->out1_ = p;
+ } else {
+ l.head = ip->out();
+ ip->set_out(p);
+ }
}
}
-}
-// Appends two patch lists and returns result.
-PatchList PatchList::Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
- if (l1.p == 0)
- return l2;
- if (l2.p == 0)
- return l1;
-
- PatchList l = l1;
- for (;;) {
- PatchList next = PatchList::Deref(inst0, l);
- if (next.p == 0)
- break;
- l = next;
+ // Appends two patch lists and returns result.
+ static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
+ if (l1.head == 0)
+ return l2;
+ if (l2.head == 0)
+ return l1;
+ Prog::Inst* ip = &inst0[l1.tail>>1];
+ if (l1.tail&1)
+ ip->out1_ = l2.head;
+ else
+ ip->set_out(l2.head);
+ return {l1.head, l2.tail};
}
- Prog::Inst* ip = &inst0[l.p>>1];
- if (l.p&1)
- ip->out1_ = l2.p;
- else
- ip->set_out(l2.p);
+ uint32_t head;
+ uint32_t tail; // for linear-time append
+};
- return l1;
-}
+static const PatchList kNullPatchList = {0, 0};
// Compiled program fragment.
struct Frag {
uint32_t begin;
PatchList end;
- Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector
+ Frag() : begin(0) { end.head = 0; } // needed so Frag can go in vector
Frag(uint32_t begin, PatchList end) : begin(begin), end(end) {}
};
@@ -212,8 +178,8 @@
int AddSuffixRecursive(int root, int id);
// Finds the trie node for the given suffix. Returns a Frag in order to
- // distinguish between pointing at the root node directly (end.p == 0)
- // and pointing at an Alt's out1 or out (end.p&1 == 1 or 0, respectively).
+ // distinguish between pointing at the root node directly (end.head == 0)
+ // and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively).
Frag FindByteRange(int root, int id);
// Compares two ByteRanges and returns true iff they are equal.
@@ -298,7 +264,7 @@
// Returns an unmatchable fragment.
Frag Compiler::NoMatch() {
- return Frag(0, nullPatchList);
+ return Frag(0, kNullPatchList);
}
// Is a an unmatchable fragment?
@@ -314,7 +280,7 @@
// Elide no-op.
Prog::Inst* begin = &inst_[a.begin];
if (begin->opcode() == kInstNop &&
- a.end.p == (a.begin << 1) &&
+ a.end.head == (a.begin << 1) &&
begin->out() == 0) {
// in case refs to a somewhere
PatchList::Patch(inst_.data(), a.end, b.begin);
@@ -419,7 +385,7 @@
if (id < 0)
return NoMatch();
inst_[id].InitMatch(match_id);
- return Frag(id, nullPatchList);
+ return Frag(id, kNullPatchList);
}
// Returns a fragment matching a particular empty-width op (like ^ or $)
@@ -467,7 +433,7 @@
void Compiler::BeginRange() {
rune_cache_.clear();
rune_range_.begin = 0;
- rune_range_.end = nullPatchList;
+ rune_range_.end = kNullPatchList;
}
int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
@@ -548,9 +514,9 @@
}
int br;
- if (f.end.p == 0)
+ if (f.end.head == 0)
br = root;
- else if (f.end.p&1)
+ else if (f.end.head&1)
br = inst_[f.begin].out1();
else
br = inst_[f.begin].out();
@@ -566,9 +532,9 @@
// Ensure that the parent points to the clone, not to the original.
// Note that this could leave the head unreachable except via the cache.
br = byterange;
- if (f.end.p == 0)
+ if (f.end.head == 0)
root = br;
- else if (f.end.p&1)
+ else if (f.end.head&1)
inst_[f.begin].out1_ = br;
else
inst_[f.begin].set_out(br);
@@ -601,7 +567,7 @@
Frag Compiler::FindByteRange(int root, int id) {
if (inst_[root].opcode() == kInstByteRange) {
if (ByteRangeEqual(root, id))
- return Frag(root, nullPatchList);
+ return Frag(root, kNullPatchList);
else
return NoMatch();
}