Move DeBruijnString() alongside StringGenerator.

Change-Id: Id7e2ed4be60611c0a4e56c621d9449a595c94c5e
Reviewed-on: https://code-review.googlesource.com/c/re2/+/52091
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index 9d8b533..9e15a41 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -122,44 +122,6 @@
   re->Decref();
 }
 
-// Generates and returns a string over binary alphabet {0,1} that contains
-// all possible binary sequences of length n as subsequences.  The obvious
-// brute force method would generate a string of length n * 2^n, but this
-// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
-// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
-// Such a string is useful for testing a DFA.  If you have a DFA
-// where distinct last n bytes implies distinct states, then running on a
-// DeBruijn string causes the DFA to need to create a new state at every
-// position in the input, never reusing any states until it gets to the
-// end of the string.  This is the worst possible case for DFA execution.
-static std::string DeBruijnString(int n) {
-  CHECK_LT(n, static_cast<int>(8*sizeof(int)));
-  CHECK_GT(n, 0);
-
-  std::vector<bool> did(size_t{1}<<n);
-  for (int i = 0; i < 1<<n; i++)
-    did[i] = false;
-
-  std::string s;
-  for (int i = 0; i < n-1; i++)
-    s.append("0");
-  int bits = 0;
-  int mask = (1<<n) - 1;
-  for (int i = 0; i < (1<<n); i++) {
-    bits <<= 1;
-    bits &= mask;
-    if (!did[bits|1]) {
-      bits |= 1;
-      s.append("1");
-    } else {
-      s.append("0");
-    }
-    CHECK(!did[bits]);
-    did[bits] = true;
-  }
-  return s;
-}
-
 // Test that the DFA gets the right result even if it runs
 // out of memory during a search.  The regular expression
 // 0[01]{n}$ matches a binary string of 0s and 1s only if
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
index 089d822..c53ade0 100644
--- a/re2/testing/regexp_benchmark.cc
+++ b/re2/testing/regexp_benchmark.cc
@@ -792,40 +792,6 @@
 
 /*
 TODO(rsc): Make this work again.
-
-// Generates and returns a string over binary alphabet {0,1} that contains
-// all possible binary sequences of length n as subsequences.  The obvious
-// brute force method would generate a string of length n * 2^n, but this
-// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
-// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
-static std::string DeBruijnString(int n) {
-  CHECK_LT(n, 8*sizeof(int));
-  CHECK_GT(n, 0);
-
-  std::vector<bool> did(1<<n);
-  for (int i = 0; i < 1<<n; i++)
-    did[i] = false;
-
-  std::string s;
-  for (int i = 0; i < n-1; i++)
-    s.append("0");
-  int bits = 0;
-  int mask = (1<<n) - 1;
-  for (int i = 0; i < (1<<n); i++) {
-    bits <<= 1;
-    bits &= mask;
-    if (!did[bits|1]) {
-      bits |= 1;
-      s.append("1");
-    } else {
-      s.append("0");
-    }
-    CHECK(!did[bits]);
-    did[bits] = true;
-  }
-  return s;
-}
-
 void CacheFill(int iters, int n, SearchImpl *srch) {
   std::string s = DeBruijnString(n+1);
   std::string t;
diff --git a/re2/testing/string_generator.cc b/re2/testing/string_generator.cc
index 030cc45..44837fe 100644
--- a/re2/testing/string_generator.cc
+++ b/re2/testing/string_generator.cc
@@ -111,4 +111,31 @@
   hasnext_ = true;
 }
 
+std::string DeBruijnString(int n) {
+  CHECK_GE(n, 1);
+  CHECK_LE(n, 29);
+  const size_t size = size_t{1} << static_cast<size_t>(n);
+  const size_t mask = size - 1;
+  std::vector<bool> did(size, false);
+  std::string s;
+  s.reserve(static_cast<size_t>(n) + size);
+  for (size_t i = 0; i < static_cast<size_t>(n - 1); i++)
+    s += '0';
+  size_t bits = 0;
+  for (size_t i = 0; i < size; i++) {
+    bits <<= 1;
+    bits &= mask;
+    if (!did[bits | 1]) {
+      bits |= 1;
+      s += '1';
+    } else {
+      s += '0';
+    }
+    CHECK(!did[bits]);
+    did[bits] = true;
+  }
+  CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
+  return s;
+}
+
 }  // namespace re2
diff --git a/re2/testing/string_generator.h b/re2/testing/string_generator.h
index 6184176..73fbb51 100644
--- a/re2/testing/string_generator.h
+++ b/re2/testing/string_generator.h
@@ -58,6 +58,19 @@
   StringGenerator& operator=(const StringGenerator&) = delete;
 };
 
+// Generates and returns a string over binary alphabet {0,1} that contains
+// all possible binary sequences of length n as subsequences.  The obvious
+// brute force method would generate a string of length n * 2^n, but this
+// generates a string of length n-1 + 2^n called a De Bruijn cycle.
+// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
+//
+// Such a string is useful for testing a DFA.  If you have a DFA
+// where distinct last n bytes implies distinct states, then running on a
+// DeBruijn string causes the DFA to need to create a new state at every
+// position in the input, never reusing any states until it gets to the
+// end of the string.  This is the worst possible case for DFA execution.
+std::string DeBruijnString(int n);
+
 }  // namespace re2
 
 #endif  // RE2_TESTING_STRING_GENERATOR_H_