Move DeBruijnString() alongside StringGenerator.
Change-Id: Id7e2ed4be60611c0a4e56c621d9449a595c94c5e
Reviewed-on: https://code-review.googlesource.com/c/re2/+/52091
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index 9d8b533..9e15a41 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -122,44 +122,6 @@
re->Decref();
}
-// Generates and returns a string over binary alphabet {0,1} that contains
-// all possible binary sequences of length n as subsequences. The obvious
-// brute force method would generate a string of length n * 2^n, but this
-// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
-// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
-// Such a string is useful for testing a DFA. If you have a DFA
-// where distinct last n bytes implies distinct states, then running on a
-// DeBruijn string causes the DFA to need to create a new state at every
-// position in the input, never reusing any states until it gets to the
-// end of the string. This is the worst possible case for DFA execution.
-static std::string DeBruijnString(int n) {
- CHECK_LT(n, static_cast<int>(8*sizeof(int)));
- CHECK_GT(n, 0);
-
- std::vector<bool> did(size_t{1}<<n);
- for (int i = 0; i < 1<<n; i++)
- did[i] = false;
-
- std::string s;
- for (int i = 0; i < n-1; i++)
- s.append("0");
- int bits = 0;
- int mask = (1<<n) - 1;
- for (int i = 0; i < (1<<n); i++) {
- bits <<= 1;
- bits &= mask;
- if (!did[bits|1]) {
- bits |= 1;
- s.append("1");
- } else {
- s.append("0");
- }
- CHECK(!did[bits]);
- did[bits] = true;
- }
- return s;
-}
-
// Test that the DFA gets the right result even if it runs
// out of memory during a search. The regular expression
// 0[01]{n}$ matches a binary string of 0s and 1s only if
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
index 089d822..c53ade0 100644
--- a/re2/testing/regexp_benchmark.cc
+++ b/re2/testing/regexp_benchmark.cc
@@ -792,40 +792,6 @@
/*
TODO(rsc): Make this work again.
-
-// Generates and returns a string over binary alphabet {0,1} that contains
-// all possible binary sequences of length n as subsequences. The obvious
-// brute force method would generate a string of length n * 2^n, but this
-// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
-// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
-static std::string DeBruijnString(int n) {
- CHECK_LT(n, 8*sizeof(int));
- CHECK_GT(n, 0);
-
- std::vector<bool> did(1<<n);
- for (int i = 0; i < 1<<n; i++)
- did[i] = false;
-
- std::string s;
- for (int i = 0; i < n-1; i++)
- s.append("0");
- int bits = 0;
- int mask = (1<<n) - 1;
- for (int i = 0; i < (1<<n); i++) {
- bits <<= 1;
- bits &= mask;
- if (!did[bits|1]) {
- bits |= 1;
- s.append("1");
- } else {
- s.append("0");
- }
- CHECK(!did[bits]);
- did[bits] = true;
- }
- return s;
-}
-
void CacheFill(int iters, int n, SearchImpl *srch) {
std::string s = DeBruijnString(n+1);
std::string t;
diff --git a/re2/testing/string_generator.cc b/re2/testing/string_generator.cc
index 030cc45..44837fe 100644
--- a/re2/testing/string_generator.cc
+++ b/re2/testing/string_generator.cc
@@ -111,4 +111,31 @@
hasnext_ = true;
}
+std::string DeBruijnString(int n) {
+ CHECK_GE(n, 1);
+ CHECK_LE(n, 29);
+ const size_t size = size_t{1} << static_cast<size_t>(n);
+ const size_t mask = size - 1;
+ std::vector<bool> did(size, false);
+ std::string s;
+ s.reserve(static_cast<size_t>(n) + size);
+ for (size_t i = 0; i < static_cast<size_t>(n - 1); i++)
+ s += '0';
+ size_t bits = 0;
+ for (size_t i = 0; i < size; i++) {
+ bits <<= 1;
+ bits &= mask;
+ if (!did[bits | 1]) {
+ bits |= 1;
+ s += '1';
+ } else {
+ s += '0';
+ }
+ CHECK(!did[bits]);
+ did[bits] = true;
+ }
+ CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
+ return s;
+}
+
} // namespace re2
diff --git a/re2/testing/string_generator.h b/re2/testing/string_generator.h
index 6184176..73fbb51 100644
--- a/re2/testing/string_generator.h
+++ b/re2/testing/string_generator.h
@@ -58,6 +58,19 @@
StringGenerator& operator=(const StringGenerator&) = delete;
};
+// Generates and returns a string over binary alphabet {0,1} that contains
+// all possible binary sequences of length n as subsequences. The obvious
+// brute force method would generate a string of length n * 2^n, but this
+// generates a string of length n-1 + 2^n called a De Bruijn cycle.
+// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
+//
+// Such a string is useful for testing a DFA. If you have a DFA
+// where distinct last n bytes implies distinct states, then running on a
+// DeBruijn string causes the DFA to need to create a new state at every
+// position in the input, never reusing any states until it gets to the
+// end of the string. This is the worst possible case for DFA execution.
+std::string DeBruijnString(int n);
+
} // namespace re2
#endif // RE2_TESTING_STRING_GENERATOR_H_