blob: d9de5d3d126530e43f845bc4865340a457496c63 [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package adaptorlib.prebuilt;
import adaptorlib.AbstractAdaptor;
import adaptorlib.CommandStreamParser;
import adaptorlib.DocId;
import adaptorlib.DocIdPusher;
import adaptorlib.Request;
import adaptorlib.Response;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Date;
import java.util.logging.Logger;
/**
* Command Line Adaptor
*/
public class CommandLineAdaptor extends AbstractAdaptor {
private static final Logger log = Logger.getLogger(CommandLineAdaptor.class.getName());
private Charset encoding = Charset.forName("UTF-8");
@Override
public void getDocIds(DocIdPusher pusher) throws IOException,
InterruptedException {
int commandResult;
Command command = newListerCommand();
try {
log.finest("Command: ./list-doc-ids.sh");
commandResult = command.exec(new String[] {"./list-doc-ids.sh"});
} catch (IOException e) {
throw new IOException("External command could not be executed.", e);
}
if (commandResult != 0) {
String errorOutput = new String(command.getStderr(), encoding);
throw new IOException("External command error. code = " + commandResult + ". Stderr: "
+ errorOutput);
}
CommandStreamParser parser = new CommandStreamParser(
new ByteArrayInputStream(command.getStdout()));
log.finest("Pushing Document IDs.");
pusher.pushRecords(parser.readFromLister());
}
/** Gives the bytes of a document referenced with id. */
@Override
public void getDocContent(Request req, Response resp) throws IOException {
DocId id = req.getDocId();
int commandResult;
Command command = newRetrieverCommand();
try {
Date lastCrawled = req.getLastAccessTime();
long lastCrawledMillis = 0;
if (lastCrawled != null) {
lastCrawledMillis = lastCrawled.getTime();
}
log.finest("Command: ./get-doc-contents.sh " + id.getUniqueId() +
" " + lastCrawledMillis);
commandResult = command.exec(new String[] {"./get-doc-contents.sh", id.getUniqueId(),
Long.toString(lastCrawledMillis)});
} catch (InterruptedException e) {
throw new IOException("Thread intrupted while waiting for external command.", e);
} catch (IOException e) {
throw new IOException("External command could not be executed.", e);
}
if (commandResult != 0) {
String errorOutput = new String(command.getStderr(), encoding);
throw new IOException("External command error. code=" + commandResult + ". Stderr: "
+ errorOutput);
}
CommandStreamParser parser = new CommandStreamParser(
new ByteArrayInputStream(command.getStdout()));
CommandStreamParser.RetrieverInfo retrieverInfo = parser.readFromRetriever();
if (!req.getDocId().equals(retrieverInfo.getDocId())) {
throw new IOException("requested document " + req.getDocId() + " does not match retrieved "
+ "document " + retrieverInfo.getDocId() + ".");
}
if (retrieverInfo.notFound()) {
resp.respondNotFound();
} else if (retrieverInfo.isUpToDate()) {
log.finest("Retriever: " + id.getUniqueId() + " is up to date.");
resp.respondNotModified();
} else {
if (retrieverInfo.getMimeType() != null) {
log.finest("Retriever: " + id.getUniqueId() + " has mime-type "
+ retrieverInfo.getMimeType());
resp.setContentType(retrieverInfo.getMimeType());
}
if (retrieverInfo.getMetadata() != null) {
log.finest("Retriever: " + id.getUniqueId() + " has metadata "
+ retrieverInfo.getMetadata());
resp.setMetadata(retrieverInfo.getMetadata());
}
if (retrieverInfo.getContents() != null) {
resp.getOutputStream().write(retrieverInfo.getContents());
} else {
throw new IOException("No content returned by retriever for " + req.getDocId() + ".");
}
}
}
protected Command newListerCommand() {
return new Command();
}
protected Command newRetrieverCommand() {
return new Command();
}
/** Call default main for adaptors. */
public static void main(String[] args) {
AbstractAdaptor.main(new CommandLineAdaptor(), args);
}
}