Note that case-insensitive prefixes are already lowercase.

Change-Id: Idbccb386ed50cbd3b40c72604aa42f0570e50a89
Reviewed-on: https://code-review.googlesource.com/c/re2/+/59030
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/prog.cc b/re2/prog.cc
index 3b2faaa..396b46c 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -924,12 +924,6 @@
 // This function takes the prefix as std::string (i.e. not const std::string&
 // as normal) because it's going to clobber it, so a temporary is convenient.
 static uint64_t* BuildShiftDFA(std::string prefix) {
-  // Convert any ASCII letters to lowercase; uppercase will be handled later.
-  for (char& b : prefix) {
-    if ('A' <= b && b <= 'Z')
-      b += 'a' - 'A';
-  }
-
   // This constant is for convenience now and also for correctness later when
   // we clobber the prefix, but still need to know how long it was initially.
   const size_t size = prefix.size();
@@ -989,6 +983,10 @@
         ++dnext;
       dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
       // Convert ASCII letters to uppercase and record the extra transitions.
+      // Note that ASCII letters are guaranteed to be lowercase at this point
+      // because that's how the parser normalises them. #FunFact: 'k' and 's'
+      // match U+212A and U+017F, respectively, so they won't occur here when
+      // using UTF-8 encoding because the parser will emit character classes.
       if ('a' <= b && b <= 'z') {
         b -= 'a' - 'A';
         dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
diff --git a/re2/testing/required_prefix_test.cc b/re2/testing/required_prefix_test.cc
index 820cf2b..60a11f8 100644
--- a/re2/testing/required_prefix_test.cc
+++ b/re2/testing/required_prefix_test.cc
@@ -131,6 +131,40 @@
   }
 }
 
+TEST(RequiredPrefixForAccel, CaseFoldingForKAndS) {
+  Regexp* re;
+  std::string p;
+  bool f;
+
+  // With Latin-1 encoding, `(?i)` prefixes can include 'k' and 's'.
+  re = Regexp::Parse("(?i)KLM", Regexp::LikePerl|Regexp::Latin1, NULL);
+  ASSERT_TRUE(re != NULL);
+  ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
+  ASSERT_EQ(p, "klm");
+  ASSERT_EQ(f, true);
+  re->Decref();
+
+  re = Regexp::Parse("(?i)STU", Regexp::LikePerl|Regexp::Latin1, NULL);
+  ASSERT_TRUE(re != NULL);
+  ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
+  ASSERT_EQ(p, "stu");
+  ASSERT_EQ(f, true);
+  re->Decref();
+
+  // With UTF-8 encoding, `(?i)` prefixes can't include 'k' and 's'.
+  // This is because they match U+212A and U+017F, respectively, and
+  // so the parser ends up emitting character classes, not literals.
+  re = Regexp::Parse("(?i)KLM", Regexp::LikePerl, NULL);
+  ASSERT_TRUE(re != NULL);
+  ASSERT_FALSE(re->RequiredPrefixForAccel(&p, &f));
+  re->Decref();
+
+  re = Regexp::Parse("(?i)STU", Regexp::LikePerl, NULL);
+  ASSERT_TRUE(re != NULL);
+  ASSERT_FALSE(re->RequiredPrefixForAccel(&p, &f));
+  re->Decref();
+}
+
 static const char* prefix_accel_tests[] = {
     "aababc\\d+",
     "(?i)AABABC\\d+",