Merge branch 'master' of https://code.google.com/p/plexi
diff --git a/src/com/google/enterprise/adaptor/Config.java b/src/com/google/enterprise/adaptor/Config.java
index 56db897..9d1cd53 100644
--- a/src/com/google/enterprise/adaptor/Config.java
+++ b/src/com/google/enterprise/adaptor/Config.java
@@ -75,13 +75,17 @@
  *     are already URLs and avoid them being inserted into adaptor
        generated URLs.   Defaults to false
  * <tr><td> </td><td>feed.crawlImmediatelyBitEnabled </td><td> send bit telling
- *     GSA to crawl immediately.  Defaults to false
+ *     GSA to crawl immediately.
+ *     Defaults to not overriding adaptor's decision which is typically to send
+ *     updates as crawl-immediately and let GSA schedule crawl of all other ids
+ * <tr><td> </td><td>feed.noRecrawlBitEnabled </td><td> send bit telling
+ *     GSA to crawl your documents only once. 
+ *     Defaults to not overriding adaptor's decision which is typically to send
+ *     all documents as recrawlable (equivalent to value of false)
  * <tr><td> </td><td>feed.maxUrls </td><td> set max number of URLs included
  *     per feed file.    Defaults to 5000
  * <tr><td> </td><td>feed.name </td><td> source name used in feeds. Generated
  *     if not provided
- * <tr><td> </td><td>feed.noRecrawlBitEnabled </td><td> send bit telling
- *     GSA to crawl your documents only once.  Defaults to  false
  * <tr><td> </td><td>feed.archiveDirectory </td><td> specifies a directory in
  *     which all feeds sent to the GSA will be archived.  Feeds that failed to
  *     be sent to the GSA will be tagged with "FAILED" in the file name.
@@ -235,8 +239,8 @@
             return rawValue;
           }
         });
-    addKey("feed.noRecrawlBitEnabled", "false");
-    addKey("feed.crawlImmediatelyBitEnabled", "false");
+    addKey("feed.noRecrawlBitEnabled", "");
+    addKey("feed.crawlImmediatelyBitEnabled", "");
     //addKey("feed.noFollowBitEnabled", "false");
     addKey("feed.maxUrls", "5000");
     addKey("adaptor.pushDocIdsOnStartup", "true");
@@ -442,21 +446,43 @@
     return Boolean.parseBoolean(getValue("gsa.acceptsDocControlsHeader"));
   }
 
-  /**
-   * Optional (default false): Adds no-recrawl bit with sent records in feed
-   * file. If connector handles updates and deletes then GSA does not have to
-   * recrawl periodically to notice that a document is changed or deleted.
-   */
-  boolean isFeedNoRecrawlBitEnabled() {
-    return Boolean.getBoolean(getValue("feed.noRecrawlBitEnabled"));
+  static class OverridableBoolean {
+    final boolean isOverriden; // whether value is to be overriden
+    final boolean value; // the overriding value
+    private OverridableBoolean(boolean override) {
+      isOverriden = true;
+      value = override;
+    }
+    private OverridableBoolean() {
+      isOverriden = false;
+      value = false; // whatever
+    }
   }
 
   /**
-   * Optional (default false): Adds crawl-immediately bit with sent records in
+   * Optional: Adds crawl-immediately bit with sent records in
    * feed file.  This bit makes the sent URL get crawl priority.
    */
-  boolean isCrawlImmediatelyBitEnabled() {
-    return Boolean.parseBoolean(getValue("feed.crawlImmediatelyBitEnabled"));
+  OverridableBoolean isCrawlImmediatelyBitEnabled() {
+    String provided = getValue("feed.crawlImmediatelyBitEnabled");
+    if ("".equals(provided.trim())) {
+      return new OverridableBoolean();
+    }
+    return new OverridableBoolean(Boolean.parseBoolean(provided));
+  }
+
+  /**
+   * Optional: Adds no-recrawl bit with sent records in feed
+   * file. If connector handles updates and deletes then GSA
+   * does not have to recrawl periodically to notice that a
+   * document is changed or deleted.
+   */
+  OverridableBoolean isFeedNoRecrawlBitEnabled() {
+    String provided = getValue("feed.noRecrawlBitEnabled");
+    if ("".equals(provided.trim())) {
+      return new OverridableBoolean();
+    }
+    return new OverridableBoolean(Boolean.parseBoolean(provided));
   }
 
   /**
diff --git a/src/com/google/enterprise/adaptor/GsaCommunicationHandler.java b/src/com/google/enterprise/adaptor/GsaCommunicationHandler.java
index dcd785e..db77785 100644
--- a/src/com/google/enterprise/adaptor/GsaCommunicationHandler.java
+++ b/src/com/google/enterprise/adaptor/GsaCommunicationHandler.java
@@ -208,7 +208,11 @@
     aclTransform = createAclTransform();
     GsaFeedFileMaker fileMaker = new GsaFeedFileMaker(docIdCodec, aclTransform,
         config.isGsa614FeedWorkaroundEnabled(),
-        config.isGsa70AuthMethodWorkaroundEnabled());
+        config.isGsa70AuthMethodWorkaroundEnabled(),
+        config.isCrawlImmediatelyBitEnabled().isOverriden,
+        config.isCrawlImmediatelyBitEnabled().value,
+        config.isFeedNoRecrawlBitEnabled().isOverriden,
+        config.isFeedNoRecrawlBitEnabled().value);
     GsaFeedFileArchiver fileArchiver =
         new GsaFeedFileArchiver(config.getFeedArchiveDirectory());
     docIdSender = new DocIdSender(fileMaker, fileSender, fileArchiver, journal,
diff --git a/src/com/google/enterprise/adaptor/GsaFeedFileMaker.java b/src/com/google/enterprise/adaptor/GsaFeedFileMaker.java
index 37a329d..82056fe 100644
--- a/src/com/google/enterprise/adaptor/GsaFeedFileMaker.java
+++ b/src/com/google/enterprise/adaptor/GsaFeedFileMaker.java
@@ -65,6 +65,10 @@
   private final AclTransform aclTransform;
   private final boolean separateClosingRecordTagWorkaround;
   private final boolean useAuthMethodWorkaround;
+  private final boolean crawlImmediatelyIsOverriden;
+  private final boolean crawlImmediatelyOverrideValue;
+  private final boolean crawlOnceIsOverriden;
+  private final boolean crawlOnceOverrideValue;
 
   public GsaFeedFileMaker(DocIdEncoder encoder, AclTransform aclTransform) {
     this(encoder, aclTransform, false, false);
@@ -73,11 +77,26 @@
   public GsaFeedFileMaker(DocIdEncoder encoder, AclTransform aclTransform,
       boolean separateClosingRecordTagWorkaround,
       boolean useAuthMethodWorkaround) {
+    this(encoder, aclTransform, separateClosingRecordTagWorkaround,
+        useAuthMethodWorkaround, false, false, false, false);
+  }
+
+  public GsaFeedFileMaker(DocIdEncoder encoder, AclTransform aclTransform,
+      boolean separateClosingRecordTagWorkaround,
+      boolean useAuthMethodWorkaround,
+      boolean overrideCrawlImmediately,
+      boolean crawlImmediately,
+      boolean overrideCrawlOnce,
+      boolean crawlOnce) {
     this.idEncoder = encoder;
     this.aclTransform = aclTransform;
     this.separateClosingRecordTagWorkaround
         = separateClosingRecordTagWorkaround;
     this.useAuthMethodWorkaround = useAuthMethodWorkaround;
+    this.crawlImmediatelyIsOverriden = overrideCrawlImmediately;
+    this.crawlImmediatelyOverrideValue = crawlImmediately;
+    this.crawlOnceIsOverriden = overrideCrawlOnce;
+    this.crawlOnceOverrideValue = crawlOnce;
   }
 
   /** Adds header to document's root.
@@ -123,10 +142,15 @@
     if (docRecord.isToBeLocked()) {
       record.setAttribute("lock", "true");
     }
-    if (docRecord.isToBeCrawledImmediately()) {
+    if (crawlImmediatelyIsOverriden) {
+      record.setAttribute("crawl-immediately",
+          "" + crawlImmediatelyOverrideValue);
+    } else if (docRecord.isToBeCrawledImmediately()) {
       record.setAttribute("crawl-immediately", "true");
     }
-    if (docRecord.isToBeCrawledOnce()) {
+    if (crawlOnceIsOverriden) {
+      record.setAttribute("crawl-once", "" + crawlOnceOverrideValue);
+    } else if (docRecord.isToBeCrawledOnce()) {
       record.setAttribute("crawl-once", "true");
     }
     if (useAuthMethodWorkaround) {
diff --git a/test/com/google/enterprise/adaptor/GsaFeedFileMakerTest.java b/test/com/google/enterprise/adaptor/GsaFeedFileMakerTest.java
index 7eefa33..787448b 100644
--- a/test/com/google/enterprise/adaptor/GsaFeedFileMakerTest.java
+++ b/test/com/google/enterprise/adaptor/GsaFeedFileMakerTest.java
@@ -489,4 +489,174 @@
     xml = xml.replaceAll("\r\n", "\n");
     assertEquals(golden, xml);
   }
+
+  @Test
+  public void testCrawlImmediatelyOverride() throws java.net.URISyntaxException {
+    GsaFeedFileMaker lclMeker = new GsaFeedFileMaker(encoder, aclTransform,
+        false, false,
+        /*override crawl-immediately?*/ true,
+        /*crawl-immediately value*/ false,
+        /*override crawl-once?*/ false,
+        /*crawl-once value*/ false);
+    String golden =
+        "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"
+        + "<!DOCTYPE gsafeed PUBLIC \"-//Google//DTD GSA Feeds//EN\" \"\">\n"
+        + "<gsafeed>\n"
+        + "<!--GSA EasyConnector-->\n"
+        + "<header>\n"
+        + "<datasource>t3sT</datasource>\n"
+        + "<feedtype>metadata-and-url</feedtype>\n"
+        + "</header>\n"
+        + "<group>\n"
+
+        // (1)
+        + "<record crawl-immediately=\"false\""
+        + " displayurl=\"http://f000nkey.net\" mimetype=\"text/plain\""
+        + " url=\"http://localhost/E11\"/>\n"
+
+        // (2)
+        + "<record crawl-immediately=\"false\""
+        + " displayurl=\"http://yankee.doodle.com\""
+        + " last-modified=\"Thu, 01 Jan 1970 00:00:00 +0000\""
+        + " mimetype=\"text/plain\" url=\"http://localhost/elefenta\"/>\n"
+
+        // (3)
+        + "<record crawl-immediately=\"false\""
+        + " displayurl=\"http://google.com/news\""
+        + " last-modified=\"Fri, 02 Jan 1970 00:00:00 +0000\""
+        + " mimetype=\"text/plain\" url=\"http://localhost/gone\"/>\n"
+
+        // (4)
+        + "<record crawl-immediately=\"false\" crawl-once=\"true\""
+        + " lock=\"true\" mimetype=\"text/plain\""
+        + " url=\"http://localhost/flagson\"/>\n"
+
+        // (5)
+        + "<record action=\"delete\" crawl-immediately=\"false\""
+        + " mimetype=\"text/plain\""
+        + " url=\"http://localhost/deleted\"/>\n"
+
+        + "</group>\n"
+        + "</gsafeed>\n";
+
+    ArrayList<DocIdPusher.Record> ids = new ArrayList<DocIdPusher.Record>();
+    DocIdPusher.Record.Builder attrBuilder 
+        = new DocIdPusher.Record.Builder(new DocId("E11"));
+
+    // (1)
+    attrBuilder.setResultLink(new URI("http://f000nkey.net"));
+    ids.add(attrBuilder.build());
+
+    // (2)
+    attrBuilder.setResultLink(new URI("http://yankee.doodle.com"));    
+    attrBuilder.setLastModified(new Date(0));    
+    attrBuilder.setCrawlImmediately(true);    
+    attrBuilder.setDocId(new DocId("elefenta"));
+    ids.add(attrBuilder.build());
+
+    // (3)
+    attrBuilder.setResultLink(new URI("http://google.com/news"));    
+    attrBuilder.setLastModified(new Date(1000 * 60 * 60 * 24));    
+    attrBuilder.setCrawlImmediately(false);    
+    attrBuilder.setCrawlOnce(false);    
+    attrBuilder.setDocId(new DocId("gone"));
+    ids.add(attrBuilder.build());
+
+    // (4)
+    ids.add(new DocIdPusher.Record.Builder(new DocId("flagson"))
+        .setLock(true).setCrawlImmediately(true).setCrawlOnce(true).build());
+
+    // (5)
+    ids.add(new DocIdPusher.Record.Builder(new DocId("deleted"))
+        .setDeleteFromIndex(true).build());
+
+    String xml = lclMeker.makeMetadataAndUrlXml("t3sT", ids);
+    xml = xml.replaceAll("\r\n", "\n");
+    assertEquals(golden, xml);
+  }
+
+  @Test
+  public void testCrawlOnceOverride() throws java.net.URISyntaxException {
+    GsaFeedFileMaker lclMeker = new GsaFeedFileMaker(encoder, aclTransform,
+        false, false,
+        /*override crawl-immediately?*/ false,
+        /*crawl-immediately value*/ false,
+        /*override crawl-once?*/ true,
+        /*crawl-once value*/ false);
+    String golden =
+        "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"
+        + "<!DOCTYPE gsafeed PUBLIC \"-//Google//DTD GSA Feeds//EN\" \"\">\n"
+        + "<gsafeed>\n"
+        + "<!--GSA EasyConnector-->\n"
+        + "<header>\n"
+        + "<datasource>t3sT</datasource>\n"
+        + "<feedtype>metadata-and-url</feedtype>\n"
+        + "</header>\n"
+        + "<group>\n"
+
+        // (1)
+        + "<record crawl-once=\"false\""
+        + " displayurl=\"http://f000nkey.net\" mimetype=\"text/plain\""
+        + " url=\"http://localhost/E11\"/>\n"
+
+        // (2)
+        + "<record crawl-immediately=\"true\" crawl-once=\"false\""
+        + " displayurl=\"http://yankee.doodle.com\""
+        + " last-modified=\"Thu, 01 Jan 1970 00:00:00 +0000\""
+        + " mimetype=\"text/plain\" url=\"http://localhost/elefenta\"/>\n"
+
+        // (3)
+        + "<record crawl-once=\"false\""
+        + " displayurl=\"http://google.com/news\""
+        + " last-modified=\"Fri, 02 Jan 1970 00:00:00 +0000\""
+        + " mimetype=\"text/plain\" url=\"http://localhost/gone\"/>\n"
+
+        // (4)
+        + "<record crawl-immediately=\"true\" crawl-once=\"false\""
+        + " lock=\"true\" mimetype=\"text/plain\""
+        + " url=\"http://localhost/flagson\"/>\n"
+
+        // (5)
+        + "<record action=\"delete\" crawl-once=\"false\""
+        + " mimetype=\"text/plain\""
+        + " url=\"http://localhost/deleted\"/>\n"
+
+        + "</group>\n"
+        + "</gsafeed>\n";
+
+    ArrayList<DocIdPusher.Record> ids = new ArrayList<DocIdPusher.Record>();
+    DocIdPusher.Record.Builder attrBuilder 
+        = new DocIdPusher.Record.Builder(new DocId("E11"));
+
+    // (1)
+    attrBuilder.setResultLink(new URI("http://f000nkey.net"));
+    ids.add(attrBuilder.build());
+
+    // (2)
+    attrBuilder.setResultLink(new URI("http://yankee.doodle.com"));    
+    attrBuilder.setLastModified(new Date(0));    
+    attrBuilder.setCrawlImmediately(true);    
+    attrBuilder.setDocId(new DocId("elefenta"));
+    ids.add(attrBuilder.build());
+
+    // (3)
+    attrBuilder.setResultLink(new URI("http://google.com/news"));    
+    attrBuilder.setLastModified(new Date(1000 * 60 * 60 * 24));    
+    attrBuilder.setCrawlImmediately(false);    
+    attrBuilder.setCrawlOnce(false);    
+    attrBuilder.setDocId(new DocId("gone"));
+    ids.add(attrBuilder.build());
+
+    // (4)
+    ids.add(new DocIdPusher.Record.Builder(new DocId("flagson"))
+        .setLock(true).setCrawlImmediately(true).setCrawlOnce(true).build());
+
+    // (5)
+    ids.add(new DocIdPusher.Record.Builder(new DocId("deleted"))
+        .setDeleteFromIndex(true).build());
+
+    String xml = lclMeker.makeMetadataAndUrlXml("t3sT", ids);
+    xml = xml.replaceAll("\r\n", "\n");
+    assertEquals(golden, xml);
+  }
 }