Support pagination
Pagination is only provided by SharePoint for GetContent for Folders and
GetChanges.
diff --git a/src/com/google/enterprise/adaptor/sharepoint/SharePointAdaptor.java b/src/com/google/enterprise/adaptor/sharepoint/SharePointAdaptor.java
index 88fbcb5..bcc269d 100644
--- a/src/com/google/enterprise/adaptor/sharepoint/SharePointAdaptor.java
+++ b/src/com/google/enterprise/adaptor/sharepoint/SharePointAdaptor.java
@@ -212,7 +212,7 @@
for (String contentDatabase : newContentDatabases) {
SiteDataStub.ContentDatabase cd;
try {
- cd = client.getContentContentDatabase(contentDatabase);
+ cd = client.getContentContentDatabase(contentDatabase, false);
} catch (IOException ex) {
log.log(Level.WARNING, "Could not retrieve change id for content "
+ "database: " + contentDatabase, ex);
@@ -229,15 +229,29 @@
// this database is gone.
continue;
}
+ CursorPaginator<SiteDataStub.SPContentDatabase, String> changesPaginator
+ = client.getChangesContentDatabase(contentDatabase, changeId);
+ SiteDataStub.SPContentDatabase changes;
try {
- changeId = client.getModifiedDocIds(contentDatabase, changeId, pusher);
+ while ((changes = changesPaginator.next()) != null) {
+ try {
+ client.getModifiedDocIds(changes, pusher);
+ } catch (XmlProcessingException ex) {
+ log.log(Level.WARNING, "Error parsing changes from content "
+ + "database: " + contentDatabase, ex);
+ // The cursor is guaranteed to be advanced past the position that
+ // failed parsing, so we just ignore the failure and continue
+ // looping.
+ }
+ contentDatabaseChangeId.put(contentDatabase,
+ changesPaginator.getCursor());
+ }
} catch (IOException ex) {
log.log(Level.WARNING, "Error getting changes from content database: "
+ contentDatabase, ex);
// Continue processing. Hope that next time works better.
continue;
}
- contentDatabaseChangeId.put(contentDatabase, changeId);
}
log.exiting("SharePointAdaptor", "getModifiedDocIds", pusher);
}
@@ -379,7 +393,7 @@
for (SiteDataStub.ContentDatabase_type0 cd_t0
: vs.getContentDatabases().getContentDatabase()) {
SiteDataStub.ContentDatabase cd
- = getContentContentDatabase(cd_t0.getID());
+ = getContentContentDatabase(cd_t0.getID(), true);
if (cd.getSites() != null && cd.getSites().getSite() != null) {
for (SiteDataStub.Site_type0 site : cd.getSites().getSite()) {
writer.write(liUrl(site.getURL()));
@@ -475,17 +489,22 @@
+ "</head>"
+ "<body>"
+ "<h1>Folder " + folderPath + "</h1>");
- SiteDataStub.Folder folder = getContentFolder(listGuid, folderPath);
- SiteDataStub.Xml xml = folder.getFolder().getXml();
- OMElement data = getFirstChildWithName(xml, DATA_ELEMENT);
+ Paginator<SiteDataStub.Folder> folderPaginator
+ = getContentFolder(listGuid, folderPath);
writer.write("<p>List items</p><ul>");
- for (OMElement row : getChildrenWithName(data, ROW_ELEMENT)) {
- String rowUrl = row.getAttributeValue(OWS_SERVERURL_ATTRIBUTE);
- String rowTitle = row.getAttributeValue(OWS_TITLE_ATTRIBUTE);
- // TODO(ejona): Fix raw string concatenation.
- writer.write("<li><a href=\"" + encodeUrl(rowUrl) + "\">" + rowTitle
- + "</a></li>");
+ SiteDataStub.Folder folder;
+ while ((folder = folderPaginator.next()) != null) {
+ SiteDataStub.Xml xml = folder.getFolder().getXml();
+
+ OMElement data = getFirstChildWithName(xml, DATA_ELEMENT);
+ for (OMElement row : getChildrenWithName(data, ROW_ELEMENT)) {
+ String rowUrl = row.getAttributeValue(OWS_SERVERURL_ATTRIBUTE);
+ String rowTitle = row.getAttributeValue(OWS_TITLE_ATTRIBUTE);
+ // TODO(ejona): Fix raw string concatenation.
+ writer.write("<li><a href=\"" + encodeUrl(rowUrl) + "\">" + rowTitle
+ + "</a></li>");
+ }
}
writer.write("</ul>");
@@ -735,13 +754,10 @@
/**
* @return new change id
*/
- private String getModifiedDocIds(String contentDatabase,
- String lastChangeId, DocIdPusher pusher) throws IOException,
- InterruptedException {
+ private void getModifiedDocIds(SiteDataStub.SPContentDatabase changes,
+ DocIdPusher pusher) throws IOException, InterruptedException {
log.entering("SiteDataClient", "getModifiedDocIds",
- new Object[] {contentDatabase, lastChangeId, pusher});
- SiteDataStub.SPContentDatabase changes
- = getChangesContentDatabase(contentDatabase, lastChangeId);
+ new Object[] {changes, pusher});
List<DocId> docIds = new ArrayList<DocId>();
getModifiedDocIdsContentDatabase(changes, docIds);
List<DocIdPusher.Record> records
@@ -753,9 +769,7 @@
records.add(builder.setDocId(docId).build());
}
pusher.pushRecords(records);
- lastChangeId = changes.getContentDatabase().getMetadata().getChangeId();
- log.exiting("SiteDataClient", "getModifiedDocIds", lastChangeId);
- return lastChangeId;
+ log.exiting("SiteDataClient", "getModifiedDocIds");
}
private void getModifiedDocIdsContentDatabase(
@@ -927,12 +941,12 @@
return urlResponse;
}
- private SiteDataStub.ContentDatabase getContentContentDatabase(String id)
- throws IOException {
+ private SiteDataStub.ContentDatabase getContentContentDatabase(String id,
+ boolean retrieveChildItems) throws IOException {
log.entering("SiteDataClient", "getContentContentDatabase", id);
SiteDataStub.GetContent request = new SiteDataStub.GetContent();
request.setObjectType(SiteDataStub.ObjectType.ContentDatabase);
- request.setRetrieveChildItems(true);
+ request.setRetrieveChildItems(retrieveChildItems);
request.setSecurityOnly(false);
request.setObjectId(id);
SiteDataStub.GetContentResponse response = stub.getContent(request);
@@ -1030,32 +1044,44 @@
return data;
}
- private SiteDataStub.Folder getContentFolder(String guid, String url)
- throws IOException {
+ private Paginator<SiteDataStub.Folder> getContentFolder(String guid,
+ String url) {
log.entering("SiteDataClient", "getContentFolder",
new Object[] {guid, url});
- SiteDataStub.GetContent request = new SiteDataStub.GetContent();
+ final SiteDataStub.GetContent request = new SiteDataStub.GetContent();
request.setObjectType(SiteDataStub.ObjectType.Folder);
request.setRetrieveChildItems(true);
request.setSecurityOnly(false);
request.setFolderUrl(url);
request.setObjectId(guid);
request.setLastItemIdOnPage("");
- SiteDataStub.GetContentResponse response = stub.getContent(request);
- log.log(Level.FINE, "GetContent(Folder): Result={0}, "
- + "LastItemIdOnPage={1}", new Object[] {
- response.getGetContentResult(), response.getLastItemIdOnPage()});
- String xml = response.getGetContentResult();
- xml = xml.replace("<Folder>", "<Folder xmlns='" + XMLNS + "'>");
- XMLStreamReader reader = createXmlStreamReader(xml);
- SiteDataStub.Folder folder;
- try {
- folder = SiteDataStub.Folder.Factory.parse(reader);
- } catch (Exception ex) {
- throw new XmlProcessingException(ex);
- }
- log.exiting("SiteDataClient", "getContentFolder", folder);
- return folder;
+ log.exiting("SiteDataClient", "getContentFolder");
+ return new Paginator<SiteDataStub.Folder>() {
+ @Override
+ public SiteDataStub.Folder next() throws IOException {
+ if (request.getLastItemIdOnPage() == null) {
+ return null;
+ }
+ log.log(Level.FINE, "GetContent request: ObjectType={0}, "
+ + "ObjectId={1}, LastItemIdOnPage={2}, RetrieveChildItems={3}, "
+ + "FolderUrl={4}", new Object[] {request.getObjectType(),
+ request.getObjectId(), request.getLastItemIdOnPage(),
+ request.getRetrieveChildItems(), request.getFolderUrl()});
+ SiteDataStub.GetContentResponse response = stub.getContent(request);
+ log.log(Level.FINE, "GetContent(Folder): Result={0}, "
+ + "LastItemIdOnPage={1}", new Object[] {
+ response.getGetContentResult(), response.getLastItemIdOnPage()});
+ request.setLastItemIdOnPage(response.getLastItemIdOnPage());
+ String xml = response.getGetContentResult();
+ xml = xml.replace("<Folder>", "<Folder xmlns='" + XMLNS + "'>");
+ XMLStreamReader reader = createXmlStreamReader(xml);
+ try {
+ return SiteDataStub.Folder.Factory.parse(reader);
+ } catch (Exception ex) {
+ throw new XmlProcessingException(ex);
+ }
+ }
+ };
}
private SiteDataStub.Item getContentListItemAttachments(String listId,
@@ -1086,33 +1112,60 @@
return item;
}
- private SiteDataStub.SPContentDatabase getChangesContentDatabase(
- String contentDatabaseGuid, String startChangeId) throws IOException {
+ /**
+ * Get a paginator that allows looping over all the changes since {@code
+ * startChangeId}. If next() throws an XmlProcessingException, it is
+ * guaranteed to be after state has been updated so that a subsequent call
+ * to next() will provide the next page and not repeat the erroring page.
+ */
+ private CursorPaginator<SiteDataStub.SPContentDatabase, String>
+ getChangesContentDatabase(String contentDatabaseGuid,
+ String startChangeId) {
log.entering("SiteDataClient", "getChangesContentDatabase",
new Object[] {contentDatabaseGuid, startChangeId});
- SiteDataStub.GetChanges request = new SiteDataStub.GetChanges();
+ final SiteDataStub.GetChanges request = new SiteDataStub.GetChanges();
request.setObjectType(SiteDataStub.ObjectType.ContentDatabase);
request.setContentDatabaseId(contentDatabaseGuid);
request.setLastChangeId(startChangeId);
request.setTimeout(15);
- SiteDataStub.GetChangesResponse response = stub.getChanges(request);
- log.log(Level.FINE, "GetChanges(ContentDatabase): Result={0}, "
- + "MoreChanges={1}, CurrentChangeId={2}, LastChangeId={3}",
- new Object[] {
- response.getGetChangesResult(), response.getMoreChanges(),
- response.getCurrentChangeId(), response.getLastChangeId()});
- String xml = response.getGetChangesResult();
- xml = xml.replace("<SPContentDatabase ",
- "<SPContentDatabase xmlns='" + XMLNS + "' ");
- SiteDataStub.SPContentDatabase cd;
- XMLStreamReader reader = createXmlStreamReader(xml);
- try {
- cd = SiteDataStub.SPContentDatabase.Factory.parse(reader);
- } catch (Exception ex) {
- throw new XmlProcessingException(ex);
- }
- log.exiting("SiteDataClient", "getChangesContentDatabase", cd);
- return cd;
+ log.exiting("SiteDataClient", "getChangesContentDatabase");
+ return new CursorPaginator<SiteDataStub.SPContentDatabase, String>() {
+ @Override
+ public SiteDataStub.SPContentDatabase next() throws IOException {
+ if (request.getLastChangeId().equals(request.getCurrentChangeId())) {
+ return null;
+ }
+ log.log(Level.FINE, "Request: ObjectType={0}, ContentDatabaseId={1}, "
+ + "LastChangeId={2}, CurrentChangeId={3}, Timeout={4}",
+ new Object[] {request.getObjectType(),
+ request.getContentDatabaseId(), request.getLastChangeId(),
+ request.getCurrentChangeId(), request.getTimeout()});
+ SiteDataStub.GetChangesResponse response = stub.getChanges(request);
+ log.log(Level.FINE, "GetChanges(ContentDatabase): Result={0}, "
+ + "MoreChanges={1}, CurrentChangeId={2}, LastChangeId={3}",
+ new Object[] {
+ response.getGetChangesResult(), response.getMoreChanges(),
+ response.getCurrentChangeId(), response.getLastChangeId()});
+ // Update state for next iteration.
+ request.setLastChangeId(response.getLastChangeId());
+ request.setCurrentChangeId(response.getCurrentChangeId());
+ // XmlProcessingExceptions fine after this point.
+ String xml = response.getGetChangesResult();
+ xml = xml.replace("<SPContentDatabase ",
+ "<SPContentDatabase xmlns='" + XMLNS + "' ");
+ XMLStreamReader reader = createXmlStreamReader(xml);
+ try {
+ return SiteDataStub.SPContentDatabase.Factory.parse(reader);
+ } catch (Exception ex) {
+ throw new XmlProcessingException(ex);
+ }
+ }
+
+ @Override
+ public String getCursor() {
+ return request.getLastChangeId();
+ }
+ };
}
private XMLStreamReader createXmlStreamReader(String xml)
@@ -1133,4 +1186,24 @@
super(cause);
}
}
+
+ private interface Paginator<E> {
+ /**
+ * Get the next page of the series. If an exception is thrown, the state of
+ * the paginator is undefined.
+ *
+ * @return the next page of data, or {@code null} if no more pages available
+ */
+ public E next() throws IOException;
+ }
+
+ private interface CursorPaginator<E, C> extends Paginator<E> {
+ /**
+ * Provides a cursor for the current position. The intent is that you could
+ * get a cursor (even in the event of {@link #next} throwing an exception)
+ * and use it to create a query that would continue without repeating
+ * results.
+ */
+ public C getCursor();
+ }
}