escape initial slashe preserves initial slash in doc ids.
diff --git a/src/com/google/enterprise/adaptor/DocIdCodec.java b/src/com/google/enterprise/adaptor/DocIdCodec.java index 869cc09..dc58c2d 100644 --- a/src/com/google/enterprise/adaptor/DocIdCodec.java +++ b/src/com/google/enterprise/adaptor/DocIdCodec.java
@@ -48,6 +48,10 @@ uniqueId = uniqueId.replaceAll("(?<!:)/(?=/)", "/..."); // Precede index.html and index.htm with "_" to avoid Google eating them. uniqueId = uniqueId.replaceFirst("(^|/)(_*index.html?)$", "$1_$2"); + // If starts with "/" avoid double slash after baseDocUri. + if (uniqueId.startsWith("/")) { + uniqueId = "..." + uniqueId; + } try { resource = new URI(null, null, baseDocUri.getPath() + uniqueId, null); } catch (URISyntaxException ex) { @@ -67,6 +71,9 @@ throw new IllegalArgumentException("URI does not refer to a DocId"); } String id = uri.getPath().substring(basePath.length()); + if (id.startsWith(".../")) { + id = id.substring(3); + } id = id.replaceFirst("(^|/)_(_*index.html?)$", "$1$2"); id = id.replaceAll("(?<!:)/\\.\\.\\.(?=/)", "/"); // Remove three dots from any sequence of only dots that's at least
diff --git a/test/com/google/enterprise/adaptor/DocIdCodecTest.java b/test/com/google/enterprise/adaptor/DocIdCodecTest.java index b8e27a7..2bbb04e 100644 --- a/test/com/google/enterprise/adaptor/DocIdCodecTest.java +++ b/test/com/google/enterprise/adaptor/DocIdCodecTest.java
@@ -130,6 +130,30 @@ } @Test + public void testBumperSlashes() { + String docId = "/mnt/winser/"; + URI uri = codec.encodeDocId(new DocId(docId)); + String uriStr = uri.toString(); + assertTrue(uriStr.startsWith("http://")); + assertFalse(uriStr.substring("http://".length()).contains("//")); + assertTrue(uriStr.contains("/.../")); + assertTrue(uriStr.endsWith("/")); + assertEquals(docId, codec.decodeDocId(uri).getUniqueId()); + } + + @Test + public void testBumperSlashes2() { + String docId = "///mnt/winser///"; + URI uri = codec.encodeDocId(new DocId(docId)); + String uriStr = uri.toString(); + assertTrue(uriStr.startsWith("http://")); + assertFalse(uriStr.substring("http://".length()).contains("//")); + assertTrue(uriStr.contains("/.../.../.../")); + assertTrue(uriStr.endsWith("r/.../.../")); + assertEquals(docId, codec.decodeDocId(uri).getUniqueId()); + } + + @Test public void testDoubleSlashAfterColon3() { String docId = "//d:////t//://NOW://.//"; URI uri = codec.encodeDocId(new DocId(docId));