Optimise fanout bucketing.

Change-Id: Id00903805b5092f15f306d74bef22b6443d3f4da
Reviewed-on: https://code-review.googlesource.com/c/re2/+/54332
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/bitmap256.h b/re2/bitmap256.h
index f649b4c..1a1868b 100644
--- a/re2/bitmap256.h
+++ b/re2/bitmap256.h
@@ -51,9 +51,8 @@
   // Finds the least significant non-zero bit in n.
   static int FindLSBSet(uint64_t n) {
     DCHECK_NE(n, 0);
-
 #if defined(__GNUC__)
-    return __builtin_ctzll(n);
+    return __builtin_ctzl(n);
 #elif defined(_MSC_VER) && defined(_M_X64)
     unsigned long c;
     _BitScanForward64(&c, n);
diff --git a/re2/re2.cc b/re2/re2.cc
index eb8ab3e..08be7bb 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -12,6 +12,9 @@
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -285,17 +288,39 @@
   return prog->size();
 }
 
+// Finds the most significant non-zero bit in n.
+static int FindMSBSet(uint32_t n) {
+  DCHECK_NE(n, 0);
+#if defined(__GNUC__)
+  return 31 ^ __builtin_clz(n);
+#elif defined(_MSC_VER)
+  unsigned long c;
+  _BitScanReverse(&c, n);
+  return static_cast<int>(c);
+#else
+  int c = 0;
+  for (int shift = 1 << 4; shift != 0; shift >>= 1) {
+    uint32_t word = n >> shift;
+    if (word != 0) {
+      n = word;
+      c += shift;
+    }
+  }
+  return c;
+#endif
+}
+
 static int Fanout(Prog* prog, std::map<int, int>* histogram) {
   SparseArray<int> fanout(prog->size());
   prog->Fanout(&fanout);
   histogram->clear();
   for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
-    // TODO(junyer): Optimise this?
-    int bucket = 0;
-    while (1 << bucket < i->value()) {
-      bucket++;
-    }
-    (*histogram)[bucket]++;
+    if (i->value() == 0)
+      continue;
+    uint32_t value = i->value();
+    int bucket = FindMSBSet(value);
+    bucket += value & (value-1) ? 1 : 0;
+    ++(*histogram)[bucket];
   }
   return histogram->rbegin()->first;
 }