escape initial slashe
preserves initial slash in doc ids.
diff --git a/src/com/google/enterprise/adaptor/DocIdCodec.java b/src/com/google/enterprise/adaptor/DocIdCodec.java
index 869cc09..dc58c2d 100644
--- a/src/com/google/enterprise/adaptor/DocIdCodec.java
+++ b/src/com/google/enterprise/adaptor/DocIdCodec.java
@@ -48,6 +48,10 @@
uniqueId = uniqueId.replaceAll("(?<!:)/(?=/)", "/...");
// Precede index.html and index.htm with "_" to avoid Google eating them.
uniqueId = uniqueId.replaceFirst("(^|/)(_*index.html?)$", "$1_$2");
+ // If starts with "/" avoid double slash after baseDocUri.
+ if (uniqueId.startsWith("/")) {
+ uniqueId = "..." + uniqueId;
+ }
try {
resource = new URI(null, null, baseDocUri.getPath() + uniqueId, null);
} catch (URISyntaxException ex) {
@@ -67,6 +71,9 @@
throw new IllegalArgumentException("URI does not refer to a DocId");
}
String id = uri.getPath().substring(basePath.length());
+ if (id.startsWith(".../")) {
+ id = id.substring(3);
+ }
id = id.replaceFirst("(^|/)_(_*index.html?)$", "$1$2");
id = id.replaceAll("(?<!:)/\\.\\.\\.(?=/)", "/");
// Remove three dots from any sequence of only dots that's at least
diff --git a/test/com/google/enterprise/adaptor/DocIdCodecTest.java b/test/com/google/enterprise/adaptor/DocIdCodecTest.java
index b8e27a7..2bbb04e 100644
--- a/test/com/google/enterprise/adaptor/DocIdCodecTest.java
+++ b/test/com/google/enterprise/adaptor/DocIdCodecTest.java
@@ -130,6 +130,30 @@
}
@Test
+ public void testBumperSlashes() {
+ String docId = "/mnt/winser/";
+ URI uri = codec.encodeDocId(new DocId(docId));
+ String uriStr = uri.toString();
+ assertTrue(uriStr.startsWith("http://"));
+ assertFalse(uriStr.substring("http://".length()).contains("//"));
+ assertTrue(uriStr.contains("/.../"));
+ assertTrue(uriStr.endsWith("/"));
+ assertEquals(docId, codec.decodeDocId(uri).getUniqueId());
+ }
+
+ @Test
+ public void testBumperSlashes2() {
+ String docId = "///mnt/winser///";
+ URI uri = codec.encodeDocId(new DocId(docId));
+ String uriStr = uri.toString();
+ assertTrue(uriStr.startsWith("http://"));
+ assertFalse(uriStr.substring("http://".length()).contains("//"));
+ assertTrue(uriStr.contains("/.../.../.../"));
+ assertTrue(uriStr.endsWith("r/.../.../"));
+ assertEquals(docId, codec.decodeDocId(uri).getUniqueId());
+ }
+
+ @Test
public void testDoubleSlashAfterColon3() {
String docId = "//d:////t//://NOW://.//";
URI uri = codec.encodeDocId(new DocId(docId));