Add batching for group definition feeds

This change is to address concerns over memory usage. Since the entire
feed is buffered in memory, batching the feeds allows us to reduce the
size of the block of memory allocated (which reduces the effects of
fragmentation) and reduces the amount of memory required at any point in
time.
diff --git a/src/com/google/enterprise/adaptor/DocIdPusher.java b/src/com/google/enterprise/adaptor/DocIdPusher.java
index 75b0096..2816983 100644
--- a/src/com/google/enterprise/adaptor/DocIdPusher.java
+++ b/src/com/google/enterprise/adaptor/DocIdPusher.java
@@ -142,7 +142,7 @@
    * a predictable iteration order, like {@link java.util.TreeMap}.
    *
    * @return {@code null} on success, otherwise the first GroupPrincipal to fail
-   * @throws InterruptedException if interrupted
+   * @throws InterruptedException if interrupted and no definitions were sent
    */
   public GroupPrincipal pushGroupDefinitions(
       Map<GroupPrincipal, ? extends Collection<Principal>> defs,
@@ -162,7 +162,7 @@
    * <p>If handler is {@code null}, then a default error handler is used.
    *
    * @return {@code null} on success, otherwise the first GroupPrincipal to fail
-   * @throws InterruptedException if interrupted
+   * @throws InterruptedException if interrupted and no definitions were sent
    */
   public GroupPrincipal pushGroupDefinitions(
       Map<GroupPrincipal, ? extends Collection<Principal>> defs,
diff --git a/src/com/google/enterprise/adaptor/DocIdSender.java b/src/com/google/enterprise/adaptor/DocIdSender.java
index 3c7d732..cd9ddd0 100644
--- a/src/com/google/enterprise/adaptor/DocIdSender.java
+++ b/src/com/google/enterprise/adaptor/DocIdSender.java
@@ -169,12 +169,14 @@
           // If this is not the first batch, then some items have already been
           // sent. Thus, return gracefully instead of throwing an exception so
           // that the caller can discover what was sent.
+          log.log(Level.INFO, "Pushing items interrupted");
           Thread.currentThread().interrupt();
           return batch.get(0);
         }
       }
       if (failedId != null) {
-        log.info("Failed to push all items. Failed on: " + failedId);
+        log.log(Level.INFO, "Failed to push all items. Failed on: {0}",
+            failedId);
         return failedId;
       }
       firstBatch = false;
@@ -188,12 +190,102 @@
       Map<GroupPrincipal, ? extends Collection<Principal>> defs,
       boolean caseSensitive, ExceptionHandler handler) 
       throws InterruptedException {
+    return pushGroupDefinitionsInternal(defs, caseSensitive, handler);
+  }
+
+  /*
+   * Internal version of pushGroupDefinitions() to add the parameterized generic
+   * T. We need the parameter to be able to create a List and add Map.Entries to
+   * that list.
+   *
+   * Unfortunately, due to a limitation in Java (which is still present in
+   * Java 7), the generics in the methods this one calls are forced to be
+   * parameterized even though it should be unnecessary. Fortunately, our API
+   * does not trigger this issue; it is only internal code that suffers.
+   *
+   * As an example test, these generics work fine:
+   *   private void method1(List<?> args) {
+   *     method2(args);
+   *   }
+   *
+   *   private <T> void method2(List<T> args) {
+   *     method1(args);
+   *   }
+   *
+   * Whereas these both fail to find the other method:
+   *   private void method3(List<List<?>> args) {
+   *     method4(args);
+   *   }
+   *
+   *  private <T> void method4(List<List<T>> args) {
+   *    method3(args);
+   *  }
+   *
+   * And so having any container of Map.Entry breaks mixing of parameterized
+   * and wildcard generic types.
+   *
+   * Luckily when mixed with concrete types it is only half broken:
+   *   List<List<Object>> l = null;
+   *   method3(l); // Fails
+   *   method4(l); // Compiles
+   */
+  private <T extends Collection<Principal>> GroupPrincipal
+      pushGroupDefinitionsInternal(
+      Map<GroupPrincipal, T> defs,
+      boolean caseSensitive, ExceptionHandler handler)
+      throws InterruptedException {
     if (defs.isEmpty()) {
       return null;
     }
     if (null == handler) {
       handler = defaultErrorHandler;
     }
+    boolean firstBatch = true;
+    final int max = config.getFeedMaxUrls();
+    Iterator<Map.Entry<GroupPrincipal, T>> defsIterator
+        = defs.entrySet().iterator();
+    List<Map.Entry<GroupPrincipal, T>> batch
+        = new ArrayList<Map.Entry<GroupPrincipal, T>>();
+    while (defsIterator.hasNext()) {
+      batch.clear();
+      for (int j = 0; j < max; j++) {
+        if (!defsIterator.hasNext()) {
+          break;
+        }
+        batch.add(defsIterator.next());
+      }
+      log.log(Level.INFO, "Pushing batch of {0} groups", batch.size());
+      GroupPrincipal failedId;
+      try {
+        failedId = pushSizedBatchOfGroups(batch, caseSensitive, handler);
+      } catch (InterruptedException ex) {
+        if (firstBatch) {
+          throw ex;
+        } else {
+          // If this is not the first batch, then some items have already been
+          // sent. Thus, return gracefully instead of throwing an exception so
+          // that the caller can discover what was sent.
+          log.log(Level.INFO, "Pushing items interrupted");
+          Thread.currentThread().interrupt();
+          return batch.get(0).getKey();
+        }
+      }
+      if (failedId != null) {
+        log.log(Level.INFO, "Failed to push all groups. Failed on: {0}",
+            failedId);
+        return failedId;
+      }
+      firstBatch = false;
+    }
+    log.info("Pushed groups");
+    return null;
+  }
+
+  private <T extends Collection<Principal>> GroupPrincipal
+      pushSizedBatchOfGroups(
+      List<Map.Entry<GroupPrincipal, T>> defs,
+      boolean caseSensitive, ExceptionHandler handler)
+      throws InterruptedException {
     String groupsDefXml
         = fileMaker.makeGroupsDefinitionsXml(defs, caseSensitive);
     boolean keepGoing = true;
@@ -216,12 +308,12 @@
     }
     GroupPrincipal last = null;
     if (success) {
-      log.info("pushing groups succeeded");
+      log.info("pushing groups batch succeeded");
     } else {
-      log.log(Level.WARNING, "gave up pushing groups");
-      last = defs.entrySet().iterator().next().getKey();  // checked on entry
+      last = defs.get(0).getKey();  // checked in pushGroupDefinitions()
+      log.log(Level.WARNING, "gave up pushing groups. First item: {0}", last);
     }
-    log.info("finished pushing groups");
+    log.info("finished pushing batch of groups");
     return last;
   }
 
diff --git a/src/com/google/enterprise/adaptor/GsaFeedFileMaker.java b/src/com/google/enterprise/adaptor/GsaFeedFileMaker.java
index 1cdb0bb..19471d2 100644
--- a/src/com/google/enterprise/adaptor/GsaFeedFileMaker.java
+++ b/src/com/google/enterprise/adaptor/GsaFeedFileMaker.java
@@ -297,19 +297,20 @@
   }
 
   /** Adds all the groups' definitions into body. */
-  private void constructGroupsDefinitionsFileBody(Document doc,
-      Element root, Map<GroupPrincipal, ? extends Collection<Principal>> items,
+  private <T extends Collection<Principal>> void
+      constructGroupsDefinitionsFileBody(Document doc, Element root,
+      Collection<Map.Entry<GroupPrincipal, T>> items,
       boolean caseSensitiveMembers) {
-    for (Map.Entry<GroupPrincipal, ? extends Collection<Principal>> group
-        : items.entrySet()) {
+    for (Map.Entry<GroupPrincipal, T> group : items) {
       constructSingleMembership(doc, root, group.getKey(), group.getValue(),
           caseSensitiveMembers);
     }
   }
 
   /** Puts all groups' definitions into document. */
-  private void constructGroupsDefinitionsFeedFile(Document doc,
-      Map<GroupPrincipal, ? extends Collection<Principal>> items,
+  private <T extends Collection<Principal>> void
+      constructGroupsDefinitionsFeedFile(Document doc,
+      Collection<Map.Entry<GroupPrincipal, T>> items,
       boolean caseSensitiveMembers) {
     Element root = doc.createElement("xmlgroups");
     doc.appendChild(root);
@@ -318,9 +319,12 @@
     constructGroupsDefinitionsFileBody(doc, root, items, caseSensitiveMembers);
   }
 
+  // This and all the methods it calls with things from 'items' requires the
+  // parameter T even though ? would normally suffice. See comment in
+  // DocIdSender to learn about the Java limitation causing the need for T.
   /** Makes feed file with groups and their definitions. */
-  public String makeGroupsDefinitionsXml(
-      Map<GroupPrincipal, ? extends Collection<Principal>> items,
+  public <T extends Collection<Principal>> String makeGroupsDefinitionsXml(
+      Collection<Map.Entry<GroupPrincipal, T>> items,
       boolean caseSensitiveMembers) {
     try {
       DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
diff --git a/test/com/google/enterprise/adaptor/DocIdSenderTest.java b/test/com/google/enterprise/adaptor/DocIdSenderTest.java
index ab5b1cb..8f364f5 100644
--- a/test/com/google/enterprise/adaptor/DocIdSenderTest.java
+++ b/test/com/google/enterprise/adaptor/DocIdSenderTest.java
@@ -22,6 +22,7 @@
 import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.*;
+import java.util.AbstractMap.SimpleImmutableEntry;
 import java.util.concurrent.atomic.AtomicLong;
 
 /**
@@ -223,6 +224,48 @@
   }
 
   @Test
+  public void testPushGroupsNormal() throws Exception {
+    // Order of iteration matters
+    Map<GroupPrincipal, Collection<Principal>> groups
+        = new TreeMap<GroupPrincipal, Collection<Principal>>();
+    groups.put(new GroupPrincipal("g1"),
+        Arrays.asList(new UserPrincipal("u1"), new GroupPrincipal("g2")));
+    groups.put(new GroupPrincipal("g2"),
+        Arrays.asList(new UserPrincipal("u2"), new GroupPrincipal("g3")));
+    groups.put(new GroupPrincipal("g3"),
+        Arrays.asList(new UserPrincipal("u3"), new GroupPrincipal("g4")));
+    groups = Collections.unmodifiableMap(groups);
+
+    // I'm sorry.
+    List<List<Map.Entry<GroupPrincipal, Collection<Principal>>>> goldenGroups
+        = new ArrayList<List<Map.Entry<GroupPrincipal,
+          Collection<Principal>>>>();
+    {
+      List<Map.Entry<GroupPrincipal, Collection<Principal>>> tmp
+          = new ArrayList<Map.Entry<GroupPrincipal, Collection<Principal>>>();
+      tmp.add(new SimpleImmutableEntry<GroupPrincipal, Collection<Principal>>(
+          new GroupPrincipal("g1"), groups.get(new GroupPrincipal("g1"))));
+      tmp.add(new SimpleImmutableEntry<GroupPrincipal, Collection<Principal>>(
+          new GroupPrincipal("g2"), groups.get(new GroupPrincipal("g2"))));
+      goldenGroups.add(Collections.unmodifiableList(tmp));
+      goldenGroups.add(Collections.
+          <Map.Entry<GroupPrincipal, Collection<Principal>>>singletonList(
+          new SimpleImmutableEntry<GroupPrincipal, Collection<Principal>>(
+            new GroupPrincipal("g3"), groups.get(new GroupPrincipal("g3")))));
+      goldenGroups = Collections.unmodifiableList(goldenGroups);
+    }
+
+    config.setValue("feed.maxUrls", "2");
+    docIdSender.pushGroupDefinitions(groups, false, null);
+
+    assertEquals(2, fileMaker.i);
+    assertEquals(goldenGroups, fileMaker.groupses);
+    assertEquals(Arrays.asList(new String[] {
+      "0", "1",
+    }), fileSender.xmlStrings);
+  }
+
+  @Test
   public void testNamedResources() throws Exception {
     config.setValue("feed.name", "testing");
     assertNull(docIdSender.pushNamedResources(
@@ -268,6 +311,8 @@
     List<String> names = new ArrayList<String>();
     List<List<? extends DocIdSender.Item>> recordses
         = new ArrayList<List<? extends DocIdSender.Item>>();
+    // Don't use generics because of limitations in Java
+    List<Object> groupses = new ArrayList<Object>();
     int i;
 
     public MockGsaFeedFileMaker() {
@@ -281,10 +326,19 @@
       recordses.add(items);
       return "" + i++;
     }
+
+    @Override
+    public <T extends Collection<Principal>> String makeGroupsDefinitionsXml(
+        Collection<Map.Entry<GroupPrincipal, T>> items,
+        boolean caseSensitiveMembers) {
+      groupses.add(new ArrayList<Map.Entry<GroupPrincipal, T>>(items));
+      return "" + i++;
+    }
   }
 
   private static class MockGsaFeedFileSender extends GsaFeedFileSender {
     List<String> datasources = new ArrayList<String>();
+    List<String> groupsources = new ArrayList<String>();
     List<String> xmlStrings = new ArrayList<String>();
 
     public MockGsaFeedFileSender() {
@@ -298,6 +352,13 @@
       datasources.add(datasource);
       xmlStrings.add(xmlString);
     }
+
+    @Override
+    public void sendGroups(String groupsource, String xmlString,
+        boolean useCompression) throws IOException {
+      groupsources.add(groupsource);
+      xmlStrings.add(xmlString);
+    }
   }
 
   private static class RuntimeExceptionExceptionHandler
diff --git a/test/com/google/enterprise/adaptor/GsaFeedFileMakerTest.java b/test/com/google/enterprise/adaptor/GsaFeedFileMakerTest.java
index e9e7020..fc7a256 100644
--- a/test/com/google/enterprise/adaptor/GsaFeedFileMakerTest.java
+++ b/test/com/google/enterprise/adaptor/GsaFeedFileMakerTest.java
@@ -293,7 +293,7 @@
         + "<!--GSA EasyConnector-->\n"
         + "</xmlgroups>\n";
     String xml = meker.makeGroupsDefinitionsXml(
-        new TreeMap<GroupPrincipal, List<Principal>>(), true);
+        new TreeMap<GroupPrincipal, List<Principal>>().entrySet(), true);
     xml = xml.replaceAll("\r\n", "\n");
     assertEquals(golden, xml);
   }
@@ -321,7 +321,7 @@
     List<Principal> members = new ArrayList<Principal>();
     members.add(new UserPrincipal("MacLeod\\Duncan"));
     groupDefs.put(new GroupPrincipal("immortals"), members);
-    String xml = meker.makeGroupsDefinitionsXml(groupDefs, false);
+    String xml = meker.makeGroupsDefinitionsXml(groupDefs.entrySet(), false);
     xml = xml.replaceAll("\r\n", "\n");
     assertEquals(golden, xml);
   }
@@ -374,7 +374,7 @@
     members2.add(new UserPrincipal("splat"));
     members2.add(new UserPrincipal("plump"));
     groupDefs.put(new GroupPrincipal("sounds"), members2);
-    String xml = meker.makeGroupsDefinitionsXml(groupDefs, false);
+    String xml = meker.makeGroupsDefinitionsXml(groupDefs.entrySet(), false);
     xml = xml.replaceAll("\r\n", "\n");
     assertEquals(golden, xml);
   }
@@ -408,7 +408,7 @@
     members.add(new UserPrincipal("MacLeod\\Duncan", "goodguys"));
     members.add(new GroupPrincipal("badguys", "3vil"));
     groupDefs.put(new GroupPrincipal("immortals"), members);
-    String xml = meker.makeGroupsDefinitionsXml(groupDefs, true);
+    String xml = meker.makeGroupsDefinitionsXml(groupDefs.entrySet(), true);
     xml = xml.replaceAll("\r\n", "\n");
     assertEquals(golden, xml);
   }