Decode _x????_ escapes in metadata names

Previously we just decoded space (_x0020_). However, languages that
don't use the Roman alphabet have names that are exclusively escaped,
which is a very poor experience. Thus, we now decode all the escapes.
diff --git a/src/com/google/enterprise/adaptor/sharepoint/SharePointAdaptor.java b/src/com/google/enterprise/adaptor/sharepoint/SharePointAdaptor.java
index 23bbfea..c1f5417 100644
--- a/src/com/google/enterprise/adaptor/sharepoint/SharePointAdaptor.java
+++ b/src/com/google/enterprise/adaptor/sharepoint/SharePointAdaptor.java
@@ -91,6 +91,7 @@
 import java.util.concurrent.*;
 import java.util.logging.*;
 import java.util.regex.Pattern;
+import java.util.regex.Matcher;
 
 import javax.xml.namespace.QName;
 import javax.xml.ws.BindingProvider;
@@ -200,6 +201,9 @@
       = "sharepoint:parentwebtitle";
   private static final String METADATA_LIST_GUID = "sharepoint:listguid";
 
+  private static final Pattern METADATA_ESCAPE_PATTERN
+      = Pattern.compile("_x([0-9a-f]{4})_");
+
   private static final Logger log
       = Logger.getLogger(SharePointAdaptor.class.getName());
 
@@ -638,6 +642,23 @@
     return hostUri.resolve(pathUri);
   }
 
+  /**
+   * SharePoint encodes special characters as _x????_ where the ? are hex
+   * digits. Each such encoding is a UTF-16 character. For example, _x0020_ is
+   * space and _xFFE5_ is the fullwidth yen sign.
+   */
+  @VisibleForTesting
+  static String decodeMetadataName(String name) {
+    Matcher m = METADATA_ESCAPE_PATTERN.matcher(name);
+    StringBuffer sb = new StringBuffer();
+    while (m.find()) {
+      char c = (char) Integer.parseInt(m.group(1), 16);
+      m.appendReplacement(sb, "" + c);
+    }
+    m.appendTail(sb);
+    return sb.toString();
+  }
+
   public static void main(String[] args) {
     AbstractAdaptor.main(new SharePointAdaptor(), args);
   }
@@ -1173,7 +1194,7 @@
       if (name.startsWith("ows_")) {
         name = name.substring("ows_".length());
       }
-      name = name.replace("_x0020_", " ");
+      name = decodeMetadataName(name);
       if (ALTERNATIVE_VALUE_PATTERN.matcher(value).find()) {
         // This is a lookup field. We need to take alternative values only.
         // Ignore the integer part. 314;#pi;#42;#the answer
diff --git a/test/com/google/enterprise/adaptor/sharepoint/SharePointAdaptorTest.java b/test/com/google/enterprise/adaptor/sharepoint/SharePointAdaptorTest.java
index 170ccbc..1d09769 100644
--- a/test/com/google/enterprise/adaptor/sharepoint/SharePointAdaptorTest.java
+++ b/test/com/google/enterprise/adaptor/sharepoint/SharePointAdaptorTest.java
@@ -334,6 +334,19 @@
   }
 
   @Test
+  public void testMetadataDecoding() {
+    assertEquals("NothingSpecial",
+        SharePointAdaptor.decodeMetadataName("NothingSpecial"));
+    assertEquals("_x020__x00020__0020__x0020",
+        SharePointAdaptor.decodeMetadataName("_x020__x00020__0020__x0020"));
+    assertEquals("Simple Space",
+        SharePointAdaptor.decodeMetadataName("Simple_x0020_Space"));
+    assertEquals("Multiple \u0394Replacements\u2ee8",
+        SharePointAdaptor.decodeMetadataName(
+            "Multiple_x0020__x0394_Replacements_x2ee8_"));
+  }
+
+  @Test
   public void testGetDocContentWrongServer() throws Exception {
     SoapFactory siteDataFactory = MockSoapFactory.blank()
         .endpoint(AUTH_ENDPOINT, new MockAuthenticationSoap())