blob: d798feac66dc41600e07e55ccd4d9fad01c76768 [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.enterprise.adaptor.prebuilt;
import static java.util.AbstractMap.SimpleEntry;
import com.google.enterprise.adaptor.DocumentTransform;
import com.google.enterprise.adaptor.IOHelper;
import com.google.enterprise.adaptor.Metadata;
import java.io.*;
import java.nio.charset.Charset;
import java.util.*;
import java.util.logging.*;
/**
* A conduit that allows a simple way to create a document transform based on
* a command line program.
*/
public class CommandLineTransform implements DocumentTransform {
private static final Logger log
= Logger.getLogger(CommandLineTransform.class.getName());
private static final int STDERR_BUFFER_SIZE = 51200; // 50 kB
private final Charset charset = Charset.forName("UTF-8");
private boolean commandAcceptsParameters = true;
private List<String> transformCommand;
private File workingDirectory;
public CommandLineTransform() {}
/**
* Accepts keys {@code "cmd"}, {@code "workingDirectory"}, and {@code "arg?"}.
* The {@code "arg?"} configuration values should be numerically increasing
* starting from one: {@code "arg1"}, {@code "arg2"}, {@code "arg3}, ...
*/
public static CommandLineTransform create(Map<String, String> config) {
CommandLineTransform transform = new CommandLineTransform();
List<String> cmdList = new ArrayList<String>();
String cmd = config.get("cmd");
if (cmd != null) {
cmdList.add(cmd);
} else {
throw new RuntimeException("'cmd' not defined in configuration");
}
String workingDirectory = config.get("workingDirectory");
if (workingDirectory != null) {
transform.setWorkingDirectory(new File(workingDirectory));
}
String cmdAcceptsParameters = config.get("cmdAcceptsParameters");
if (cmdAcceptsParameters != null) {
transform.commandAcceptsParameters
= Boolean.parseBoolean(cmdAcceptsParameters);
}
for (int i = 1;; i++) {
String value = config.get("arg" + i);
if (value == null) {
break;
}
cmdList.add(value);
}
transform.transformCommand = cmdList;
return transform;
}
@Override
public void transform(Metadata metadata, Map<String, String> params) {
if (transformCommand == null) {
throw new NullPointerException("transformCommand must not be null");
}
File metadataFile = null;
File paramsFile = null;
try {
String[] commandLine;
if (commandAcceptsParameters) {
metadataFile = writeIterableToTempFile(metadata);
paramsFile = writeMapToTempFile(params);
commandLine = new String[transformCommand.size() + 2];
transformCommand.toArray(commandLine);
commandLine[transformCommand.size()] = metadataFile.getAbsolutePath();
commandLine[transformCommand.size() + 1] = paramsFile.getAbsolutePath();
} else {
commandLine = transformCommand.toArray(new String[0]);
}
Command command = new Command();
try {
command.exec(commandLine, workingDirectory);
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
throw new RuntimeException(ex);
}
int exitCode = command.getReturnCode();
// Handle stderr
if (exitCode != 0) {
String errorOutput = new String(command.getStderr(), charset);
throw new RuntimeException("Exit code " + exitCode + ". Stderr: "
+ errorOutput);
}
if (command.getStderr().length > 0) {
String errorOutput = new String(command.getStderr(), charset);
log.log(Level.INFO, "Stderr: {0}", new Object[] {errorOutput});
}
if (commandAcceptsParameters) {
metadata.set(readSetFromFile(metadataFile));
params.clear();
params.putAll(readMapFromFile(paramsFile));
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);
} finally {
if (metadataFile != null) {
metadataFile.delete();
}
if (paramsFile != null) {
paramsFile.delete();
}
}
}
private File writeMapToTempFile(Map<String, String> map)
throws IOException {
return writeIterableToTempFile(map.entrySet());
}
private File writeIterableToTempFile(Iterable<Map.Entry<String, String>> it)
throws IOException {
StringBuilder sb = new StringBuilder();
for (Map.Entry<String, String> me : it) {
if (me.getKey().contains("\0")) {
throw new RuntimeException("Key cannot contain the null character: "
+ me.getKey());
}
if (me.getValue().contains("\0")) {
throw new RuntimeException("Value for key '" + me.getKey()
+ "' cannot contain the null " + "character: " + me.getKey());
}
sb.append(me.getKey()).append('\0');
sb.append(me.getValue()).append('\0');
}
return IOHelper.writeToTempFile(sb.toString(), charset);
}
private List<Map.Entry<String, String>> readListFromFile(File file) throws IOException {
InputStream is = new FileInputStream(file);
String str;
try {
str = IOHelper.readInputStreamToString(is, charset);
} finally {
is.close();
}
String[] list = str.split("\0", -1);
List<Map.Entry<String, String>> all = new ArrayList<Map.Entry<String, String>>();
for (int i = 0; i + 1 < list.length; i += 2) {
all.add(new SimpleEntry<String, String>(list[i], list[i + 1]));
}
return all;
}
private Set<Map.Entry<String, String>> readSetFromFile(File file) throws IOException {
List<Map.Entry<String, String>> all = readListFromFile(file);
Set<Map.Entry<String, String>> set = new HashSet<Map.Entry<String, String>>(all);
return set;
}
private Map<String, String> readMapFromFile(File file) throws IOException {
Map<String, String> map = new HashMap<String, String>();
for (Map.Entry<String, String> e : readListFromFile(file)) {
map.put(e.getKey(), e.getValue());
}
return map;
}
/**
* This controls whether the input parameters to the transform call are passed
* along to the actual call to the command. This is useful in the case where a
* binary might return erros when unexpected command line flags are passed in.
*/
public void setCommandAcceptsParameters(boolean commandAcceptsParameters) {
this.commandAcceptsParameters = commandAcceptsParameters;
}
public boolean getCommandAcceptsParameters() {
return commandAcceptsParameters;
}
/**
* Sets the command that is in charge of transforming the document content.
* This command should take input on stdin, and print the output to stdout.
* e.g. /path/to/command metadataFile paramsFile
*
* Errors should be printed to stderr. If anything is printed to stderr, it
* will cause a failure for this transform operation.
*/
public void setTransformCommand(List<String> transformCommand) {
this.transformCommand = new ArrayList<String>(transformCommand);
}
public List<String> getTransformCommand() {
return Collections.unmodifiableList(transformCommand);
}
/**
* Sets the working directory. Must be valid.
*
* @throws IllegalArgumentException if {@code dir} is not a directory
*/
public void setWorkingDirectory(File dir) {
if (!dir.isDirectory()) {
throw new IllegalArgumentException("File must be a directory");
}
workingDirectory = dir;
}
/**
* @return The working directory for the command line process.
*/
public File getWorkingDirectory() {
return workingDirectory;
}
}