Fix b/16139028 Part2: Do not feed docids that are too long. Examination of customer log files showed frequent exceptions thrown from WindowsAclFileAttributesView.getFileSecurity() claiming that "The filename, directory name, or volume label syntax is incorrect." As it turned out, all these pathnames exceeded the maximum pathname length of 260 characters. The too-long pathnames occur when we prepend the UNC server + share syntax to the local pathname. See: http://msdn.microsoft.com/library/windows/desktop/aa365247.aspx This change avoids feeding docids that are too long. The windows version of newDocId() now throws IllegalArgumentException if the supplied pathname is too long. Code Review: http://codereview.appspot.com/109680045
diff --git a/src/com/google/enterprise/adaptor/fs/FsAdaptor.java b/src/com/google/enterprise/adaptor/fs/FsAdaptor.java index 5c00990..60343f9 100644 --- a/src/com/google/enterprise/adaptor/fs/FsAdaptor.java +++ b/src/com/google/enterprise/adaptor/fs/FsAdaptor.java
@@ -235,6 +235,13 @@ rootPath = delegate.getPath(source); log.log(Level.CONFIG, "rootPath: {0}", rootPath); + try { + rootPathDocId = delegate.newDocId(rootPath); + } catch (IllegalArgumentException e) { + throw new InvalidConfigurationException("The path " + rootPath + + " is not valid path - " + e.getMessage() + "."); + } + // TODO(mifern): Using a path of \\host\ns\link\FolderA will be // considered non-DFS even though \\host\ns\link is a DFS link path. // This is OK for now since the check for root path below will cause an @@ -320,7 +327,6 @@ "attributes and ACLs is required to crawl a path.", e); } - rootPathDocId = delegate.newDocId(rootPath); delegate.startMonitorPath(rootPath, context.getAsyncDocIdPusher()); context.setPollingIncrementalLister(this); } @@ -467,7 +473,16 @@ return; } - if (!id.equals(delegate.newDocId(doc))) { + DocId docid; + try { + docid = delegate.newDocId(doc); + } catch (IllegalArgumentException e) { + log.log(Level.WARNING, "The docid {0} is not a valid id - {1}.", + new Object[] { doc, e.getMessage() }); + resp.respondNotFound(); + return; + } + if (!id.equals(docid)) { log.log(Level.WARNING, "The docid {0} is not a valid id generated by the adaptor.", id); resp.respondNotFound(); @@ -615,7 +630,15 @@ try { for (Path file : files) { if (isFileOrFolder(file)) { - writer.addLink(delegate.newDocId(file), getFileName(file)); + DocId docId; + try { + docId = delegate.newDocId(file); + } catch (IllegalArgumentException e) { + log.log(Level.WARNING, "Skipping {0} because {1}.", + new Object[] { doc, e.getMessage() }); + continue; + } + writer.addLink(docId, getFileName(file)); } } } finally {
diff --git a/src/com/google/enterprise/adaptor/fs/WindowsFileDelegate.java b/src/com/google/enterprise/adaptor/fs/WindowsFileDelegate.java index 7ce01db..c893d32 100644 --- a/src/com/google/enterprise/adaptor/fs/WindowsFileDelegate.java +++ b/src/com/google/enterprise/adaptor/fs/WindowsFileDelegate.java
@@ -260,7 +260,14 @@ // does NOT use expressions so regex escaping is not needed. id = id.replaceFirst("//", "\\\\\\\\"); } - return new DocId(id); + // Windows has a maximum pathname length of 260 characters. This limit + // can be worked around with some effort. For details see: + // http://msdn.microsoft.com/library/windows/desktop/aa365247.aspx + if (id.length() < WinNT.MAX_PATH) { + return new DocId(id); + } else { + throw new IllegalArgumentException("the path is too long"); + } } @Override @@ -471,16 +478,24 @@ } private void pushPath(Path doc) { - // For deleted, moved or renamed files we want to push the old name - // so in this case, feed it if the path does not exists. - boolean deletedOrMoved = !Files.exists(doc); try { + DocId docid; + try { + docid = newDocId(doc); + } catch (IllegalArgumentException e) { + log.log(Level.WARNING, "Skipping {0} because {1}.", + new Object[] { doc, e.getMessage() }); + return; + } + // For deleted, moved or renamed files we want to push the old name + // so in this case, feed it if the path does not exists. + boolean deletedOrMoved = !Files.exists(doc); if (deletedOrMoved || isRegularFile(doc) || isDirectory(doc)) { - pusher.pushRecord(new DocIdPusher.Record.Builder(newDocId(doc)) + pusher.pushRecord(new DocIdPusher.Record.Builder(docid) .setCrawlImmediately(true).build()); } else { log.log(Level.INFO, - "Skipping path {0}. It is not a supported file type.", doc); + "Skipping {0}. It is not a regular file or directory.", doc); } } catch (IOException e) { log.log(Level.WARNING, "Unable to push the path " + doc +
diff --git a/test/com/google/enterprise/adaptor/fs/WindowsFileDelegateTest.java b/test/com/google/enterprise/adaptor/fs/WindowsFileDelegateTest.java index f4c9432..864c14b 100644 --- a/test/com/google/enterprise/adaptor/fs/WindowsFileDelegateTest.java +++ b/test/com/google/enterprise/adaptor/fs/WindowsFileDelegateTest.java
@@ -269,6 +269,23 @@ return delegate.getDfsUncActiveStorageUnc(dfsPath); } + private String makeLongPath() { + String abc = "abcdefghijklmnopqrstuvwxyz"; + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < 10; i++) { + builder.append(abc); + builder.append(File.separator); + } + return builder.toString(); + } + + @Test + public void testNewDocIdLongPath() throws Exception { + Path path = Paths.get(tempRoot.toString(), makeLongPath()); + thrown.expect(IllegalArgumentException.class); + delegate.newDocId(path); + } + @Test public void testNewDocIdLocalFiles() throws Exception { Path dir = newTempDir("testDir");