Create standard transforms
diff --git a/src/com/google/enterprise/adaptor/prebuilt/PrebuiltTransforms.java b/src/com/google/enterprise/adaptor/prebuilt/PrebuiltTransforms.java
new file mode 100644
index 0000000..f94f9dc
--- /dev/null
+++ b/src/com/google/enterprise/adaptor/prebuilt/PrebuiltTransforms.java
@@ -0,0 +1,331 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.enterprise.adaptor.prebuilt;
+
+import com.google.enterprise.adaptor.DocumentTransform;
+import com.google.enterprise.adaptor.Metadata;
+
+import java.util.*;
+import java.util.logging.*;
+import java.util.regex.*;
+
+/**
+ * Common transforms that you would expect to have available.
+ */
+public class PrebuiltTransforms {
+  private static final Pattern INTEGER_PATTERN = Pattern.compile("[0-9]+");
+
+  private static final Logger log
+      = Logger.getLogger(PrebuiltTransforms.class.getName());
+
+  // Prevent instantiation.
+  private PrebuiltTransforms() {}
+
+  /**
+   * Returns a transform that copies metadata values from one key to another.
+   * The {@code "overwrite"} key can be set to {@code "true"} to cause the
+   * destination to be replaced; otherwise the destination key is supplemented.
+   *
+   * <p>Copies are defined by pairs of {@code "X.from"} and {@code "X.to"}
+   * configuration entries (where {@code X} is an integer). The value for each
+   * is a metadata key. Copies are applied in the increasing order of the
+   * integers.
+   *
+   * <p>Example configuration:
+   * <pre><code>overwrite=false
+   *3.from=colour
+   *3.to=color
+   *5.from=author
+   *5.to=contributors</code></pre>
+   */
+  public static DocumentTransform copyMetadata(Map<String, String> config) {
+    boolean overwrite = Boolean.parseBoolean(config.get("overwrite"));
+    List<String> copies = parseCopies(config);
+    if (copies.isEmpty()) {
+      log.warning("No entries listed to be copied");
+    }
+    return new CopyTransform(copies, overwrite, false);
+  }
+
+  /**
+   * Returns a transform that moves metadata values from one key to another.
+   * This method returns a transform that behaves identically to {@link
+   * #copyMetadata}, except that the source keys are removed. If the source key
+   * has no metadata values then the destination is left as-is.
+   */
+  public static DocumentTransform moveMetadata(Map<String, String> config) {
+    boolean overwrite = Boolean.parseBoolean(config.get("overwrite"));
+    List<String> copies = parseCopies(config);
+    if (copies.isEmpty()) {
+      log.warning("No entries listed to be moved");
+    }
+    return new CopyTransform(copies, overwrite, true);
+  }
+
+  /**
+   * Returns interleaved source/destination pairs.
+   */
+  private static List<String> parseCopies(Map<String, String> config) {
+    // We get all configuration items that are like "12.BLAH" (some integer
+    // followed by a dot and more text). The inner map's keys is the text that
+    // follows the dot (so "BLAH" in this case).
+    Map<Integer, Map<String, String>> intConfig
+        = new TreeMap<Integer, Map<String, String>>();
+    for (Map.Entry<String, String> me : config.entrySet()) {
+      String[] parts = me.getKey().split("\\.", 2);
+      if (parts.length != 2) {
+        log.log(Level.FINER,
+            "Not a copy definition. Does not contain a dot: {0}", me.getKey());
+        continue;
+      }
+      if (!INTEGER_PATTERN.matcher(parts[0]).matches()) {
+        log.log(Level.FINER,
+            "Not a copy definition. Does not start with an integer: {0}",
+            me.getKey());
+        continue;
+      }
+      int i = Integer.parseInt(parts[0]);
+      Map<String, String> values = intConfig.get(i);
+      if (values == null) {
+        values = new HashMap<String, String>();
+        intConfig.put(i, values);
+      }
+      values.put(parts[1], me.getValue());
+    }
+
+    // Now do the real processing of the config.
+    List<String> copies = new ArrayList<String>(intConfig.size() * 2);
+    for (Map.Entry<Integer, Map<String, String>> me : intConfig.entrySet()) {
+      String from = me.getValue().get("from");
+      String to = me.getValue().get("to");
+      if (from == null || to == null) {
+        log.log(Level.FINE, "Ignoring int {0}. Missing .from or .to",
+            me.getKey());
+        continue;
+      }
+      copies.add(from);
+      copies.add(to);
+      log.log(Level.FINE, "Found config to rename {0} to {1}",
+          new Object[] {from, to});
+    }
+    return copies;
+  }
+
+  private static class CopyTransform implements DocumentTransform {
+    private final List<String> copies;
+    private final boolean overwrite;
+    private final boolean move;
+
+    private CopyTransform(List<String> copies, boolean overwrite,
+        boolean move) {
+      if ((copies.size() % 2) != 0) {
+        throw new AssertionError();
+      }
+      this.copies = Collections.unmodifiableList(new ArrayList<String>(copies));
+      this.overwrite = overwrite;
+      this.move = move;
+    }
+
+    @Override
+    public void transform(Metadata metadata, Map<String, String> params) {
+      for (int i = 0; i < copies.size(); i += 2) {
+        String from = copies.get(i);
+        String to = copies.get(i + 1);
+        Set<String> values = metadata.getAllValues(from);
+        if (values.isEmpty()) {
+          log.log(Level.FINE, "No values for {0}. Skipping", from);
+          continue;
+        }
+        log.log(Level.FINE, "Copying values from {0} to {1}: {2}",
+            new Object[] {from, to, values});
+        Set<String> destValues = metadata.getAllValues(to);
+        if (!overwrite && !destValues.isEmpty()) {
+          values = new HashSet<String>(values);
+          log.log(Level.FINER, "Preexisting values for {0}. Combining: {1}",
+              new Object[] {to, destValues});
+          values.addAll(destValues);
+        }
+        metadata.set(to, values);
+        if (move) {
+          log.log(Level.FINER, "Deleting source {0}", from);
+          metadata.set(from, Collections.<String>emptySet());
+        }
+      }
+    }
+
+    @Override
+    public String toString() {
+      return "CopyTransform(copies=" + copies + ",overwrite=" + overwrite
+          + ",move=" + move + ")";
+    }
+  }
+
+  /**
+   * Returns a transform that deletes metadata keys. The keys to be deleted are
+   * defined by {@code "keyX"} configuration entries (where {@code X} is an
+   * integer).
+   *
+   * <p>Example configuration:
+   * <pre><code>key2=sensitive
+   *key4=unhelpful</code></pre>
+   */
+  public static DocumentTransform deleteMetadata(Map<String, String> config) {
+    Set<String> keys = new HashSet<String>(parseList(config, "key"));
+    if (keys.isEmpty()) {
+      log.warning("No entries listed to delete");
+    }
+    return new DeleteTransform(keys);
+  }
+
+  private static List<String> parseList(Map<String, String> config,
+      String prefix) {
+    List<String> keys = new LinkedList<String>();
+    for (Map.Entry<String, String> me : config.entrySet()) {
+      if (!me.getKey().startsWith(prefix)) {
+        continue;
+      }
+      String number = me.getKey().substring(prefix.length());
+      if (!INTEGER_PATTERN.matcher(number).matches()) {
+        log.log(Level.FINE, "Ignoring {0}. Number does not follow .{1}",
+            new Object[] {me.getKey(), prefix});
+        continue;
+      }
+      keys.add(me.getValue());
+    }
+    return keys;
+  }
+
+  private static class DeleteTransform implements DocumentTransform {
+    private final List<String> keys;
+
+    public DeleteTransform(Collection<String> keys) {
+      this.keys = Collections.unmodifiableList(new ArrayList<String>(keys));
+    }
+
+    @Override
+    public void transform(Metadata metadata, Map<String, String> params) {
+      for (String key : keys) {
+        metadata.set(key, Collections.<String>emptySet());
+      }
+    }
+
+    @Override
+    public String toString() {
+      return "DeleteTransform(keys=" + keys + ")";
+    }
+  }
+
+  /**
+   * Returns a transform that preforms string replacements within metadata
+   * values. The keys to have replacements done on their values are defined by
+   * {@code "keyX"} configuration entries (where {@code X} is an
+   * integer). The {@code "overwrite"} configuration key can be set to {@code
+   * "false"} to cause the original string to be left intact; otherwise the
+   * original string is replaced.
+   *
+   * <p>The needle to be found is configured via a {@code "string"} or {@code
+   * "pattern"} configuration key. {@code string}'s value is treated as a
+   * literal string to be found whereas {@code pattern}'s value is treated as
+   * a regular expression. The replacement is defined by {@code "replacement"}
+   * and is interpreted as a literal string if {@code "string"} was provided
+   * and a regular expression replacement if {@code "pattern"} was provided.
+   *
+   * <p>Example configuration:
+   * <pre><code>overwrite=false
+   *key1=favorite
+   *key5=least favorite
+   *pattern=(Java|C|Perl)
+   *replacement=$1 (but it should be x86 assembler)</code</pre>
+   */
+  public static DocumentTransform replaceMetadata(Map<String, String> config) {
+    boolean overwrite = true;
+    String overwriteString = config.get("overwrite");
+    if (overwriteString != null) {
+      overwrite = Boolean.parseBoolean(overwriteString);
+    }
+    String string = config.get("string");
+    String pattern = config.get("pattern");
+    String replacement = config.get("replacement");
+    if (replacement == null) {
+      throw new IllegalArgumentException("Missing replacement");
+    }
+    Pattern toMatch;
+    String replacementPattern;
+    if (string != null) {
+      if (pattern != null) {
+        throw new IllegalArgumentException(
+            "Using both string and pattern is not permitted");
+      }
+      toMatch = Pattern.compile(Pattern.quote(string));
+      replacementPattern = Matcher.quoteReplacement(replacement);
+    } else if (pattern != null) {
+      toMatch = Pattern.compile(pattern);
+      replacementPattern = replacement;
+    } else {
+      throw new IllegalArgumentException(
+          "Neither string or pattern is defined");
+    }
+
+    Set<String> keys = new HashSet<String>(parseList(config, "key"));
+    if (keys.isEmpty()) {
+      log.warning("No entries listed to replace");
+    }
+    return new ReplaceTransform(keys, toMatch, replacementPattern, overwrite);
+  }
+
+  private static class ReplaceTransform implements DocumentTransform {
+    private final List<String> keys;
+    private final Pattern toMatch;
+    private final String replacement;
+    private final boolean overwrite;
+
+    public ReplaceTransform(Collection<String> keys, Pattern toMatch,
+        String replacement, boolean overwrite) {
+      this.keys = Collections.unmodifiableList(new ArrayList<String>(keys));
+      this.toMatch = toMatch;
+      this.replacement = replacement;
+      this.overwrite = overwrite;
+    }
+
+    @Override
+    public void transform(Metadata metadata, Map<String, String> params) {
+      for (String key : keys) {
+        Set<String> original = metadata.getAllValues(key);
+        if (original.isEmpty()) {
+          log.log(Level.FINE, "No values for {0}. Skipping", key);
+          continue;
+        }
+        log.log(Level.FINE, "Replacing values that match {0} with {1}: {2}",
+            new Object[] {toMatch, replacement, original});
+        Set<String> values = new HashSet<String>(original);
+        for (String value : original) {
+          String newValue = toMatch.matcher(value).replaceAll(replacement);
+          if (overwrite) {
+            values.remove(value);
+          }
+          values.add(newValue);
+        }
+        log.log(Level.FINE, "After replacing: {0}", values);
+        metadata.set(key, values);
+      }
+    }
+
+    @Override
+    public String toString() {
+      return "ReplaceTransform(keys=" + keys + ",toMatch=" + toMatch
+          + ",replacement=" + replacement + ",overwrite=" + overwrite + ")";
+    }
+  }
+}
diff --git a/test/com/google/enterprise/adaptor/prebuilt/PrebuiltTransformsTest.java b/test/com/google/enterprise/adaptor/prebuilt/PrebuiltTransformsTest.java
new file mode 100644
index 0000000..26d9260
--- /dev/null
+++ b/test/com/google/enterprise/adaptor/prebuilt/PrebuiltTransformsTest.java
@@ -0,0 +1,266 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.enterprise.adaptor.prebuilt;
+
+import static org.junit.Assert.*;
+
+import com.google.enterprise.adaptor.DocumentTransform;
+import com.google.enterprise.adaptor.Metadata;
+
+import org.junit.*;
+import org.junit.rules.ExpectedException;
+
+import java.util.*;
+
+/** Unit tests for {@link PrebuiltTransfors}. */
+public class PrebuiltTransformsTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testCopy() {
+    Map<String, String> config = new HashMap<String, String>();
+    config.put("1.from", "colour");
+    config.put("1.to", "color");
+    config.put("2.from", "author");
+    config.put("2.to", "contributors");
+    config.put("3.from", "color");
+    config.put("3.to", "favorite");
+    config.put("4.from", "missing");
+    config.put("4.to", "favorite");
+    config.put("trash.from", "colour");
+    config.put("trash.to", "not used");
+    config.put("5.from", "colour");
+    config.put("6.to", "colour");
+    config = Collections.unmodifiableMap(config);
+
+    DocumentTransform transform = PrebuiltTransforms.copyMetadata(config);
+
+    final Metadata metadataGolden;
+    {
+      Metadata golden = new Metadata();
+      golden.add("colour", "black");
+      golden.add("color", "black");
+      golden.add("favorite", "black");
+      golden.add("author", "Fred");
+      golden.add("contributors", "Mary");
+      golden.add("contributors", "George");
+      golden.add("contributors", "Fred");
+      metadataGolden = golden.unmodifiableView();
+    }
+    Metadata metadata = new Metadata();
+    metadata.add("colour", "black");
+    metadata.add("author", "Fred");
+    metadata.add("contributors", "Mary");
+    metadata.add("contributors", "George");
+    transform.transform(metadata, new HashMap<String, String>());
+    assertEquals(metadataGolden, metadata);
+  }
+
+  @Test
+  public void testCopyToString() {
+    Map<String, String> config = new HashMap<String, String>();
+    config = Collections.unmodifiableMap(config);
+
+    DocumentTransform transform = PrebuiltTransforms.copyMetadata(config);
+    assertEquals("CopyTransform(copies=[],overwrite=false,move=false)",
+        transform.toString());
+  }
+
+  @Test
+  public void testMove() {
+    Map<String, String> config = new HashMap<String, String>();
+    config.put("overwrite", "true");
+    config.put("1.from", "colour");
+    config.put("1.to", "color");
+    config.put("2.from", "author");
+    config.put("2.to", "contributors");
+    config.put("3.from", "color");
+    config.put("3.to", "favorite");
+    config.put("4.from", "missing");
+    config.put("4.to", "favorite");
+    config = Collections.unmodifiableMap(config);
+
+    DocumentTransform transform = PrebuiltTransforms.moveMetadata(config);
+
+    final Metadata metadataGolden;
+    {
+      Metadata golden = new Metadata();
+      golden.add("favorite", "black");
+      golden.add("contributors", "Fred");
+      metadataGolden = golden.unmodifiableView();
+    }
+    Metadata metadata = new Metadata();
+    metadata.add("colour", "black");
+    metadata.add("author", "Fred");
+    metadata.add("contributors", "Mary");
+    metadata.add("contributors", "George");
+    transform.transform(metadata, new HashMap<String, String>());
+    assertEquals(metadataGolden, metadata);
+  }
+
+  @Test
+  public void testMoveToString() {
+    Map<String, String> config = new HashMap<String, String>();
+    config = Collections.unmodifiableMap(config);
+
+    DocumentTransform transform = PrebuiltTransforms.moveMetadata(config);
+    assertEquals("CopyTransform(copies=[],overwrite=false,move=true)",
+        transform.toString());
+  }
+
+  @Test
+  public void testDelete() {
+    Map<String, String> config = new HashMap<String, String>();
+    config.put("key1", "missing");
+    config.put("key3", "author");
+    config.put("keyy", "contributors");
+    config = Collections.unmodifiableMap(config);
+
+    DocumentTransform transform = PrebuiltTransforms.deleteMetadata(config);
+
+    final Metadata metadataGolden;
+    {
+      Metadata golden = new Metadata();
+      golden.add("colour", "black");
+      golden.add("contributors", "Mary");
+      golden.add("contributors", "George");
+      metadataGolden = golden.unmodifiableView();
+    }
+    Metadata metadata = new Metadata();
+    metadata.add("colour", "black");
+    metadata.add("author", "Fred");
+    metadata.add("contributors", "Mary");
+    metadata.add("contributors", "George");
+    transform.transform(metadata, new HashMap<String, String>());
+    assertEquals(metadataGolden, metadata);
+  }
+
+  @Test
+  public void testDeleteToString() {
+    Map<String, String> config = new HashMap<String, String>();
+    config = Collections.unmodifiableMap(config);
+
+    DocumentTransform transform = PrebuiltTransforms.deleteMetadata(config);
+    assertEquals("DeleteTransform(keys=[])", transform.toString());
+  }
+
+  @Test
+  public void testReplacePattern() {
+    Map<String, String> config = new HashMap<String, String>();
+    config.put("key1", "colour");
+    config.put("key2", "missing");
+    config.put("key4", "contributors");
+    config.put("pattern", "[aeiou]");
+    config.put("replacement", "$0$0");
+    config = Collections.unmodifiableMap(config);
+
+    DocumentTransform transform = PrebuiltTransforms.replaceMetadata(config);
+
+    final Metadata metadataGolden;
+    {
+      Metadata golden = new Metadata();
+      golden.add("colour", "blaack");
+      golden.add("author", "Fred");
+      golden.add("contributors", "Maary");
+      golden.add("contributors", "Geeoorgee");
+      metadataGolden = golden.unmodifiableView();
+    }
+    Metadata metadata = new Metadata();
+    metadata.add("colour", "black");
+    metadata.add("author", "Fred");
+    metadata.add("contributors", "Mary");
+    metadata.add("contributors", "George");
+    transform.transform(metadata, new HashMap<String, String>());
+    assertEquals(metadataGolden, metadata);
+  }
+
+  @Test
+  public void testReplaceString() {
+    Map<String, String> config = new HashMap<String, String>();
+    config.put("overwrite", "false");
+    config.put("key1", "colour");
+    config.put("key2", "missing");
+    config.put("key4", "contributors");
+    config.put("string", "[test]");
+    config.put("replacement", "$0");
+    config = Collections.unmodifiableMap(config);
+
+    DocumentTransform transform = PrebuiltTransforms.replaceMetadata(config);
+
+    final Metadata metadataGolden;
+    {
+      Metadata golden = new Metadata();
+      golden.add("colour", "black [test]");
+      golden.add("colour", "black $0");
+      golden.add("author", "Fred [test]");
+      golden.add("contributors", "Ma[test]ry[test]");
+      golden.add("contributors", "Ma$0ry$0");
+      golden.add("contributors", "George");
+      metadataGolden = golden.unmodifiableView();
+    }
+    Metadata metadata = new Metadata();
+    metadata.add("colour", "black [test]");
+    metadata.add("author", "Fred [test]");
+    metadata.add("contributors", "Ma[test]ry[test]");
+    metadata.add("contributors", "George");
+    transform.transform(metadata, new HashMap<String, String>());
+    assertEquals(metadataGolden, metadata);
+  }
+
+  @Test
+  public void testReplaceToString() {
+    Map<String, String> config = new HashMap<String, String>();
+    config.put("string", "tofind");
+    config.put("replacement", "replace$0");
+    config = Collections.unmodifiableMap(config);
+
+    DocumentTransform transform = PrebuiltTransforms.replaceMetadata(config);
+    assertEquals("ReplaceTransform(keys=[],toMatch=\\Qtofind\\E,"
+        + "replacement=replace\\$0,overwrite=true)", transform.toString());
+  }
+
+  @Test
+  public void testReplaceMissingStringAndPattern() {
+    Map<String, String> config = new HashMap<String, String>();
+    config.put("replacement", "replace$0");
+    config = Collections.unmodifiableMap(config);
+
+    thrown.expect(IllegalArgumentException.class);
+    DocumentTransform transform = PrebuiltTransforms.replaceMetadata(config);
+  }
+
+  @Test
+  public void testReplaceBothStringAndPattern() {
+    Map<String, String> config = new HashMap<String, String>();
+    config.put("string", "tofind");
+    config.put("pattern", "tofind");
+    config.put("replacement", "replace$0");
+    config = Collections.unmodifiableMap(config);
+
+    thrown.expect(IllegalArgumentException.class);
+    DocumentTransform transform = PrebuiltTransforms.replaceMetadata(config);
+  }
+
+  @Test
+  public void testReplaceMissingReplacement() {
+    Map<String, String> config = new HashMap<String, String>();
+    config.put("string", "tofind");
+    config = Collections.unmodifiableMap(config);
+
+    thrown.expect(IllegalArgumentException.class);
+    DocumentTransform transform = PrebuiltTransforms.replaceMetadata(config);
+  }
+}