Make Compiler and Prog use PODArray<> for Insts. Change-Id: I2fcd6e301d91dd46cd4ad082632eb4fc0b8897c8 Reviewed-on: https://code-review.googlesource.com/c/36810 Reviewed-by: Paul Wankadia <junyer@google.com>

commit: 04452af34b2f0696d68f129264c865715be33096 [log] [tgz]
author: Chris Kennelly <ckennelly@google.com> Tue Dec 18 16:04:50 2018 -0500
committer: Paul Wankadia <junyer@google.com> Fri Dec 21 05:11:42 2018 +0000
tree: d5f98c836bbaaa4713645dbd814ac5fd8446b112
parent: 92a2604fca5303d21732972cb4cd0880735c6612 [diff]
diff --git a/re2/compile.cc b/re2/compile.cc
index 454c872..b8ce491 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc

@@ -14,6 +14,7 @@
 #include <utility>
 
 #include "util/logging.h"
+#include "util/pod_array.h"
 #include "util/utf.h"
 #include "re2/prog.h"
 #include "re2/re2.h"
@@ -236,11 +237,9 @@
   Encoding encoding_;  // Input encoding
   bool reversed_;      // Should program run backward over text?
 
-  int max_inst_;       // Maximum number of instructions.
-
-  Prog::Inst* inst_;   // Pointer to first instruction.
-  int inst_len_;       // Number of instructions used.
-  int inst_cap_;       // Number of instructions allocated.
+  PODArray<Prog::Inst> inst_;
+  int ninst_;          // Number of instructions used.
+  int max_ninst_;      // Maximum number of instructions.
 
   int64_t max_mem_;    // Total memory budget.
 
@@ -258,41 +257,38 @@
   failed_ = false;
   encoding_ = kEncodingUTF8;
   reversed_ = false;
-  inst_ = NULL;
-  inst_len_ = 0;
-  inst_cap_ = 0;
-  max_inst_ = 1;  // make AllocInst for fail instruction okay
+  ninst_ = 0;
+  max_ninst_ = 1;  // make AllocInst for fail instruction okay
   max_mem_ = 0;
   int fail = AllocInst(1);
   inst_[fail].InitFail();
-  max_inst_ = 0;  // Caller must change
+  max_ninst_ = 0;  // Caller must change
 }
 
 Compiler::~Compiler() {
   delete prog_;
-  delete[] inst_;
 }
 
 int Compiler::AllocInst(int n) {
-  if (failed_ || inst_len_ + n > max_inst_) {
+  if (failed_ || ninst_ + n > max_ninst_) {
     failed_ = true;
     return -1;
   }
 
-  if (inst_len_ + n > inst_cap_) {
-    if (inst_cap_ == 0)
-      inst_cap_ = 8;
-    while (inst_len_ + n > inst_cap_)
-      inst_cap_ *= 2;
-    Prog::Inst* ip = new Prog::Inst[inst_cap_];
-    if (inst_ != NULL)
-      memmove(ip, inst_, inst_len_ * sizeof ip[0]);
-    memset(ip + inst_len_, 0, (inst_cap_ - inst_len_) * sizeof ip[0]);
-    delete[] inst_;
-    inst_ = ip;
+  if (ninst_ + n > inst_.size()) {
+    int cap = inst_.size();
+    if (cap == 0)
+      cap = 8;
+    while (ninst_ + n > cap)
+      cap *= 2;
+    PODArray<Prog::Inst> inst(cap);
+    if (inst_.data() != NULL)
+      memmove(inst.data(), inst_.data(), ninst_*sizeof inst_[0]);
+    memset(inst.data() + ninst_, 0, (cap - ninst_)*sizeof inst_[0]);
+    inst_ = std::move(inst);
   }
-  int id = inst_len_;
-  inst_len_ += n;
+  int id = ninst_;
+  ninst_ += n;
   return id;
 }
 
@@ -320,17 +316,18 @@
   if (begin->opcode() == kInstNop &&
       a.end.p == (a.begin << 1) &&
       begin->out() == 0) {
-    PatchList::Patch(inst_, a.end, b.begin);  // in case refs to a somewhere
+    // in case refs to a somewhere
+    PatchList::Patch(inst_.data(), a.end, b.begin);
     return b;
   }
 
   // To run backward over string, reverse all concatenations.
   if (reversed_) {
-    PatchList::Patch(inst_, b.end, a.begin);
+    PatchList::Patch(inst_.data(), b.end, a.begin);
     return Frag(b.begin, a.end);
   }
 
-  PatchList::Patch(inst_, a.end, b.begin);
+  PatchList::Patch(inst_.data(), a.end, b.begin);
   return Frag(a.begin, b.end);
 }
 
@@ -347,7 +344,7 @@
     return NoMatch();
 
   inst_[id].InitAlt(a.begin, b.begin);
-  return Frag(id, PatchList::Append(inst_, a.end, b.end));
+  return Frag(id, PatchList::Append(inst_.data(), a.end, b.end));
 }
 
 // When capturing submatches in like-Perl mode, a kOpAlt Inst
@@ -363,7 +360,7 @@
   if (id < 0)
     return NoMatch();
   inst_[id].InitAlt(0, 0);
-  PatchList::Patch(inst_, a.end, id);
+  PatchList::Patch(inst_.data(), a.end, id);
   if (nongreedy) {
     inst_[id].out1_ = a.begin;
     return Frag(id, PatchList::Mk(id << 1));
@@ -395,7 +392,7 @@
     inst_[id].InitAlt(a.begin, 0);
     pl = PatchList::Mk((id << 1) | 1);
   }
-  return Frag(id, PatchList::Append(inst_, pl, a.end));
+  return Frag(id, PatchList::Append(inst_.data(), pl, a.end));
 }
 
 // Returns a fragment for the byte range lo-hi.
@@ -443,7 +440,7 @@
     return NoMatch();
   inst_[id].InitCapture(2*n, a.begin);
   inst_[id+1].InitCapture(2*n+1, 0);
-  PatchList::Patch(inst_, a.end, id+1);
+  PatchList::Patch(inst_.data(), a.end, id+1);
 
   return Frag(id, PatchList::Mk((id+1) << 1));
 }
@@ -477,9 +474,9 @@
                                      int next) {
   Frag f = ByteRange(lo, hi, foldcase);
   if (next != 0) {
-    PatchList::Patch(inst_, f.end, next);
+    PatchList::Patch(inst_.data(), f.end, next);
   } else {
-    rune_range_.end = PatchList::Append(inst_, rune_range_.end, f.end);
+    rune_range_.end = PatchList::Append(inst_.data(), rune_range_.end, f.end);
   }
   return f.begin;
 }
@@ -581,10 +578,10 @@
   if (!IsCachedRuneByteSuffix(id)) {
     // The head should be the instruction most recently allocated, so free it
     // instead of leaving it unreachable.
-    DCHECK_EQ(id, inst_len_-1);
+    DCHECK_EQ(id, ninst_-1);
     inst_[id].out_opcode_ = 0;
     inst_[id].out1_ = 0;
-    inst_len_--;
+    ninst_--;
   }
 
   out = AddSuffixRecursive(inst_[br].out(), out);
@@ -1106,15 +1103,15 @@
     encoding_ = kEncodingLatin1;
   max_mem_ = max_mem;
   if (max_mem <= 0) {
-    max_inst_ = 100000;  // more than enough
+    max_ninst_ = 100000;  // more than enough
   } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) {
     // No room for anything.
-    max_inst_ = 0;
+    max_ninst_ = 0;
   } else {
     int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst);
     // Limit instruction count so that inst->id() fits nicely in an int.
     // SparseArray also assumes that the indices (inst->id()) are ints.
-    // The call to WalkExponential uses 2*max_inst_ below,
+    // The call to WalkExponential uses 2*max_ninst_ below,
     // and other places in the code use 2 or 3 * prog->size().
     // Limiting to 2^24 should avoid overflow in those places.
     // (The point of allowing more than 32 bits of memory is to
@@ -1127,7 +1124,7 @@
     if (m > Prog::Inst::kMaxInst)
       m = Prog::Inst::kMaxInst;
 
-    max_inst_ = static_cast<int>(m);
+    max_ninst_ = static_cast<int>(m);
   }
 
   anchor_ = anchor;
@@ -1155,7 +1152,7 @@
   bool is_anchor_end = IsAnchorEnd(&sre, 0);
 
   // Generate fragment for entire regexp.
-  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_inst_);
+  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
   sre->Decref();
   if (c.failed_)
     return NULL;
@@ -1192,13 +1189,12 @@
 
   if (prog_->start() == 0 && prog_->start_unanchored() == 0) {
     // No possible matches; keep Fail instruction only.
-    inst_len_ = 1;
+    ninst_ = 1;
   }
 
   // Hand off the array to Prog.
-  prog_->inst_ = inst_;
-  prog_->size_ = inst_len_;
-  inst_ = NULL;
+  prog_->inst_ = std::move(inst_);
+  prog_->size_ = ninst_;
 
   prog_->Optimize();
   prog_->Flatten();
@@ -1241,7 +1237,7 @@
   if (sre == NULL)
     return NULL;
 
-  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_inst_);
+  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
   sre->Decref();
   if (c.failed_)
     return NULL;

diff --git a/re2/prog.cc b/re2/prog.cc
index b0ae375..fa03af9 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc

@@ -112,7 +112,6 @@
     first_byte_(-1),
     flags_(0),
     list_count_(0),
-    inst_(NULL),
     onepass_nodes_(NULL),
     dfa_mem_(0),
     dfa_first_(NULL),
@@ -123,7 +122,6 @@
   DeleteDFA(dfa_longest_);
   DeleteDFA(dfa_first_);
   delete[] onepass_nodes_;
-  delete[] inst_;
 }
 
 typedef SparseSet Workq;
@@ -628,9 +626,8 @@
 
   // Finally, replace the old instructions with the new instructions.
   size_ = static_cast<int>(flat.size());
-  delete[] inst_;
-  inst_ = new Inst[size_];
-  memmove(inst_, flat.data(), size_ * sizeof *inst_);
+  inst_ = PODArray<Inst>(size_);
+  memmove(inst_.data(), flat.data(), size_*sizeof(inst_[0]));
 }
 
 void Prog::MarkSuccessors(SparseArray<int>* rootmap,

diff --git a/re2/prog.h b/re2/prog.h
index 6d48135..268ab9d 100644
--- a/re2/prog.h
+++ b/re2/prog.h

@@ -18,6 +18,7 @@
 
 #include "util/util.h"
 #include "util/logging.h"
+#include "util/pod_array.h"
 #include "util/sparse_array.h"
 #include "util/sparse_set.h"
 #include "re2/re2.h"
@@ -77,7 +78,7 @@
     void InitFail();
 
     // Getters
-    int id(Prog* p) { return static_cast<int>(this - p->inst_); }
+    int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
     InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
     int last()      { return (out_opcode_>>3)&1; }
     int out()       { return out_opcode_>>4; }
@@ -395,7 +396,7 @@
   int list_count_;            // count of lists (see above)
   int inst_count_[kNumInst];  // count of instructions by opcode
 
-  Inst* inst_;              // pointer to instruction array
+  PODArray<Inst> inst_;     // pointer to instruction array
   uint8_t* onepass_nodes_;  // data for OnePass nodes
 
   int64_t dfa_mem_;         // Maximum memory for DFAs.
commit	04452af34b2f0696d68f129264c865715be33096	[log] [tgz]
author	Chris Kennelly <ckennelly@google.com>	Tue Dec 18 16:04:50 2018 -0500
committer	Paul Wankadia <junyer@google.com>	Fri Dec 21 05:11:42 2018 +0000
tree	d5f98c836bbaaa4713645dbd814ac5fd8446b112
parent	92a2604fca5303d21732972cb4cd0880735c6612 [diff]