blob: 9fe83397c9d38a0691f28d5644b61fca891e5fc4 [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package adaptorlib;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Collection;
import java.util.Date;
import java.util.Map;
import java.util.Set;
/**
* Interface for user-specific implementation details of an Adaptor.
* Implementations must be thread-safe.
*
* @see adaptorlib.examples.AdaptorTemplate
*/
public interface Adaptor {
/**
* Provides contents and metadata of particular document.
*/
public void getDocContent(Request request, Response response)
throws IOException;
/**
* Pushes all the {@code DocId}s that are suppose to be indexed by the GSA.
* This will frequently involve re-sending {@code DocId}s to the GSA, but this
* allows healing previous errors and cache inconsistencies. This method
* should determine a list of {@code DocId}s to push and call {@link
* DocIdPusher#pushDocIds} one or more times.
*
* <p>{@code pusher} is provided as convenience and is the same object
* provided to {@link #setDocIdPusher} previously. This method may take a
* while and implementations are free to call {@link Thread#sleep}
* occasionally to reduce load.
*/
public void getDocIds(DocIdPusher pusher) throws IOException,
InterruptedException;
/**
* Determines whether the user identified is allowed to access the {@code
* DocId}s.
*
* @param userIdentifier User to authorize, or {@code null} for anonymous
* users
* @param groups never-{@code null} set of groups the user belongs to
* @param ids Collection of {@code DocId}s that need to be checked
* @return an {@code AuthzStatus} for each {@code DocId} provided in {@code
* ids}
*/
public Map<DocId, AuthzStatus> isUserAuthorized(String userIdentifier,
Set<String> groups, Collection<DocId> ids) throws IOException;
/**
* Provides a {@code DocIdPusher} object to be called whenever the Adaptor
* wishes. This allows doing event-based incremental pushes at any time. The
* method is called as soon as the Adaptor is provided to the {@code
* GsaCommunicationHandler}.
*/
public void setDocIdPusher(DocIdPusher pusher);
/**
* Interface provided to {@link Adaptor#getDocContent} for describing the
* action that should be taken.
*/
public static interface Request {
/**
* Returns {@code true} if the GSA or other client's current copy of the
* document was retrieved after the {@code lastModified} date; {@code false}
* otherwise. {@code lastModified} must be in GMT.
*
* <p>If {@code false}, the client does not need to be re-sent the data,
* since what they have cached is the most recent version. In this case, you
* should then call {@link Response#respondNotModified}.
*/
public boolean hasChangedSinceLastAccess(Date lastModified);
/**
* Returns the last time a GSA or other client retrieved the data, or {@code
* null} if none was provided by the client. The returned {@code Date} is in
* GMT.
*
* <p>This is useful for determining if the client needs to be re-sent the
* data since what they have cached may be the most recent version. If the
* client is up-to-date, then call {@link Response#respondNotModified}.
*
* @return date in GMT client last accessed the DocId or {@code null}
*/
public Date getLastAccessTime();
/**
* Provides the document ID for the document that is being requested. DocId
* was not necessarily provided previously by the Adaptor; it is
* client-provided and must not be trusted.
*/
public DocId getDocId();
}
/**
* Interface provided to {@link Adaptor#getDocContent} for performing the
* actions needed to satisfy a request. If the {@code DocId} provided by
* {@link Request#getDocId} does not exist, a {@link
* java.io.FileNotFoundException} should be thrown.
*
* <p>There are several ways that a request can be processed. In the simplest
* case an Adaptor always sets different pieces of metadata, calls {@link
* #getOutputStream}, and writes the document contents.
*
* <p>For improved efficiency during recrawl by the GSA, an Adaptor should
* check {@link Request#hasChangedSinceLastAccess} and call {@link
* #respondNotModified} when it is {@code true}. This prevents the Adaptor
* from ever needing to retrieve the document contents and metadata.
*/
public static interface Response {
/**
* Respond to the GSA or other client that it already has the latest version
* of a file and its metadata. If you have called other methods on this
* object to provide various metadata, the effects of those methods will be
* ignored.
*
* <p>If called, this must be the last call to this interface. If the
* document does not exist, you must throw {@link
* java.io.FileNotFoundException} and not call this method. Once you call
* this method, for the rest of the processing, exceptions may no longer be
* communicated to clients cleanly.
*/
public void respondNotModified();
/**
* Get stream to write document contents to. There is no need to flush or
* close the {@code OutputStream} when done.
*
* <p>If called, this must be the last call to this interface (although, for
* convenience, you may call this method multiple times). If the document
* does not exist, you must throw {@link java.io.FileNotFoundException} and
* not call this method. Once you call this method, for the rest of the
* processing, exceptions may no longer be communicated to clients cleanly.
*/
public OutputStream getOutputStream();
/**
* Describe the content type of the document.
*/
public void setContentType(String contentType);
/**
* Provide metadata that apply to the document.
*/
public void setMetadata(Metadata m);
}
/**
* Interface that allows at-will pushing of {@code DocId}s to the GSA.
*/
public static interface DocIdPusher {
/**
* Push {@code DocId}s immediately and block until they are successfully
* provided to the GSA or the error handler gives up. This method can take
* a while in error conditions, but is not something that generally needs to
* be avoided.
*
* <p>Equivalent to {@code pushDocIds(docIds, null)} and {@link
* #pushDocInfos(Iterable)} with empty metadata for each {@code DocInfo}.
*
* @return {@code null} on success, otherwise the first DocId to fail
* @see #pushDocIds(Iterable, Adaptor.PushErrorHandler)
*/
public DocId pushDocIds(Iterable<DocId> docIds)
throws InterruptedException;
/**
* Push {@code DocId}s immediately and block until they are successfully
* provided to the GSA or the error handler gives up. This method can take
* a while in error conditions, but is not something that generally needs to
* be avoided.
*
* <p>If handler is {@code null}, then a default error handler is used.
*
* <p>Equivalent to {@link #pushDocInfos(Iterable,
* Adaptor.PushErrorHandler)} with empty metadata for each {@code DocInfo}.
*
* @return {@code null} on success, otherwise the first DocId to fail
*/
public DocId pushDocIds(Iterable<DocId> docIds, PushErrorHandler handler)
throws InterruptedException;
/**
* Push {@code DocInfo}s immediately and block until they are successfully
* provided to the GSA or the error handler gives up. This method can take
* a while in error conditions, but is not something that generally needs to
* be avoided.
*
* <p>Equivalent to {@code pushDocInfos(docInfos, null)}.
*
* @return {@code null} on success, otherwise the first DocInfo to fail
* @see #pushDocInfos(Iterable, Adaptor.PushErrorHandler)
*/
public DocInfo pushDocInfos(Iterable<DocInfo> docInfos)
throws InterruptedException;
/**
* Push {@code DocInfo}s immediately and block until they are successfully
* provided to the GSA or the error handler gives up. This method can take
* a while in error conditions, but is not something that generally needs to
* be avoided.
*
* <p>If handler is {@code null}, then a default error handler is used.
*
* @return {@code null} on success, otherwise the first DocInfo to fail
*/
public DocInfo pushDocInfos(Iterable<DocInfo> docInfos,
PushErrorHandler handler) throws InterruptedException;
}
/**
* Interface for handling errors encountered during pushing of {@code DocId}s.
*/
public static interface PushErrorHandler {
/**
* {@link GsaCommunicationHandler#pushDocIds} had a failure connecting with
* GSA to send a batch. The thrown exception is provided as well as the
* number of times that this batch was attempted to be sent. Return true to
* retry, perhaps after a Thread.sleep() of some time.
*/
public boolean handleFailedToConnect(Exception ex, int ntries)
throws InterruptedException;
/**
* {@link GsaCommunicationHandler#pushDocIds} had a failure writing to the
* GSA while sending a batch. The thrown exception is provided as well as
* the number of times that this batch was attempted to be sent. Return true
* to retry, perhaps after a Thread.sleep() of some time.
*/
public boolean handleFailedWriting(Exception ex, int ntries)
throws InterruptedException;
/**
* {@link GsaCommunicationHandler#pushDocIds} had a failure reading response
* from GSA. The thrown exception is provided as well as the number of times
* that this batch was attempted to be sent. Return true to retry, perhaps
* after a Thread.sleep() of some time.
*/
public boolean handleFailedReadingReply(Exception ex, int ntries)
throws InterruptedException;
}
/**
* Interface for handling error encountered during scheduled pushing of
* {@code DocId}s.
*/
public static interface GetDocIdsErrorHandler {
/**
* {@link GsaCommunicationHandler#pushDocIds} had a failure reading from
* this Adaptor's {@link Adaptor#getDocIds}. The thrown exception is
* provided as well as the number of times that this batch was attempted to
* be sent. Return true to retry, perhaps after a Thread.sleep() of some
* time.
*/
public boolean handleFailedToGetDocIds(Exception ex, int ntries)
throws InterruptedException;
}
}