remove ability to transform content; continue to allow metadata
diff --git a/src/com/google/enterprise/adaptor/AbstractDocumentTransform.java b/src/com/google/enterprise/adaptor/AbstractDocumentTransform.java
index a6a2d52..882614d 100644
--- a/src/com/google/enterprise/adaptor/AbstractDocumentTransform.java
+++ b/src/com/google/enterprise/adaptor/AbstractDocumentTransform.java
@@ -24,23 +24,21 @@
*/
public abstract class AbstractDocumentTransform implements DocumentTransform {
private String name = getClass().getName();
- private boolean required = true;
public AbstractDocumentTransform() {}
/**
* If {@code name} is {@code null}, the default is used.
*/
- public AbstractDocumentTransform(String name, boolean required) {
+ public AbstractDocumentTransform(String name) {
if (name != null) {
this.name = name;
}
- this.required = required;
}
/**
- * Configure this instance with provided {@code config}. Accepts keys {@code
- * "name"} and {@code "required"}. Unknown keys are ignored. This method is
+ * Configure this instance with provided {@code config}. Accepts key {@code
+ * "name"}. Unknown keys are ignored. This method is
* intended as a convenience for use in a static factory method.
*/
protected void configure(Map<String, String> config) {
@@ -48,11 +46,6 @@
if (name != null) {
this.name = name;
}
-
- String required = config.get("required");
- if (required != null) {
- this.required = Boolean.parseBoolean(required);
- }
}
protected void setName(String name) {
@@ -66,13 +59,4 @@
public String getName() {
return name;
}
-
- protected void setRequired(boolean required) {
- this.required = required;
- }
-
- @Override
- public boolean isRequired() {
- return required;
- }
}
diff --git a/src/com/google/enterprise/adaptor/Config.java b/src/com/google/enterprise/adaptor/Config.java
index d55d3d0..4a6244d 100644
--- a/src/com/google/enterprise/adaptor/Config.java
+++ b/src/com/google/enterprise/adaptor/Config.java
@@ -108,12 +108,8 @@
* checking. Defaults to false
* <tr><td> </td><td>server.useCompression </td><td> compress retrieval
* responses. Defaults to true
- * <tr><td> </td><td>transform.maxDocumentBytes </td><td> max size of
- * document that will get transformed. Defaults to 1048576
* <tr><td> </td><td>transform.pipeline </td><td> sequence of
* transformation steps. Defaults to no-pipeline
- * <tr><td> </td><td>transform.required </td><td> fail retrieval if document is
- * over maxDocumentBytes. Defaults to false
* </table>
*/
public class Config {
@@ -205,9 +201,6 @@
addKey("adaptor.incrementalPollPeriodSecs", "900");
addKey("adaptor.docContentTimeoutSecs", "30");
addKey("transform.pipeline", "");
- // 1 MiB.
- addKey("transform.maxDocumentBytes", "1048576");
- addKey("transform.required", "false");
addKey("journal.reducedMem", "true");
addKey("adaptor.sendDocControlsHeader", "false");
}
@@ -378,9 +371,7 @@
/**
* The maximum number of worker threads to use to respond to document
- * requests. The main reason to limit the number of threads is that each can
- * be using a transform pipeline and will have multiple complete copies of the
- * response in memory at the same time.
+ * requests.
*/
int getServerMaxWorkerThreads() {
return Integer.parseInt(getValue("server.maxWorkerThreads"));
@@ -475,14 +466,6 @@
return transforms;
}
- int getTransformMaxDocumentBytes() {
- return Integer.parseInt(getValue("transform.maxDocumentBytes"));
- }
-
- boolean isTransformRequired() {
- return Boolean.parseBoolean(getValue("transform.required"));
- }
-
boolean isJournalReducedMem() {
return Boolean.parseBoolean(getValue("journal.reducedMem"));
}
diff --git a/src/com/google/enterprise/adaptor/DocumentHandler.java b/src/com/google/enterprise/adaptor/DocumentHandler.java
index 715e76f..b58ecc9 100644
--- a/src/com/google/enterprise/adaptor/DocumentHandler.java
+++ b/src/com/google/enterprise/adaptor/DocumentHandler.java
@@ -61,8 +61,6 @@
= new HashSet<InetAddress>();
private final SamlServiceProvider samlServiceProvider;
private final TransformPipeline transform;
- private final int transformMaxBytes;
- private final boolean transformRequired;
private final boolean useCompression;
private final boolean sendDocControls;
@@ -73,8 +71,8 @@
Journal journal, Adaptor adaptor,
String gsaHostname, String[] fullAccessHosts,
SamlServiceProvider samlServiceProvider,
- TransformPipeline transform, int transformMaxBytes,
- boolean transformRequired, boolean useCompression,
+ TransformPipeline transform,
+ boolean useCompression,
Watchdog watchdog, AsyncPusher pusher,
boolean sendDocControls) {
if (docIdDecoder == null || docIdEncoder == null || journal == null
@@ -87,8 +85,6 @@
this.adaptor = adaptor;
this.samlServiceProvider = samlServiceProvider;
this.transform = transform;
- this.transformMaxBytes = transformMaxBytes;
- this.transformRequired = transformRequired;
this.useCompression = useCompression;
this.watchdog = watchdog;
this.pusher = pusher;
@@ -501,12 +497,7 @@
/** Must not respond with content, but otherwise act like normal. */
HEAD,
/** No need to buffer contents before sending. */
- NO_TRANSFORM,
- /**
- * Buffer "small" contents. Large file contents will be written without
- * transformation or cause an exception (depending on transformRequired).
- */
- TRANSFORM,
+ SEND_BODY,
}
/**
@@ -524,7 +515,7 @@
private State state = State.SETUP;
private HttpExchange ex;
// Whether ex.getResponseBody().close() has been called while we are in the
- // NO_TRANSFORM state. This isn't used for much internal code that calls
+ // SEND_BODY state. This isn't used for much internal code that calls
// close on the stream since it is obvious in those states that we won't
// ever attempt to flush or close the stream a second time.
private boolean responseBodyClosed;
@@ -573,8 +564,7 @@
// We will need to make an OutputStream.
break;
case HEAD:
- case NO_TRANSFORM:
- case TRANSFORM:
+ case SEND_BODY:
// Already called before. Provide saved OutputStream.
return os;
case NOT_MODIFIED:
@@ -588,17 +578,9 @@
state = State.HEAD;
os = new SinkOutputStream();
} else {
- if (transform != null) {
- state = State.TRANSFORM;
- OutputStream innerOs = transformRequired
- ? new CantUseOutputStream() : new LazyContentOutputStream();
- countingOs = new CountingOutputStream(innerOs);
- os = new MaxBufferOutputStream(countingOs, transformMaxBytes);
- } else {
- state = State.NO_TRANSFORM;
- countingOs = new CountingOutputStream(new LazyContentOutputStream());
- os = countingOs;
- }
+ state = State.SEND_BODY;
+ countingOs = new CountingOutputStream(new LazyContentOutputStream());
+ os = countingOs;
}
return os;
}
@@ -722,22 +704,7 @@
Translation.HTTP_NOT_FOUND);
break;
- case TRANSFORM:
- MaxBufferOutputStream mbos = (MaxBufferOutputStream) os;
- byte[] buffer = mbos.getBufferedContent();
- if (buffer == null) {
- log.info("Not transforming document because document is too large");
- } else {
- ByteArrayOutputStream baos = transform(buffer);
- buffer = null;
- startSending(true);
- baos.writeTo(ex.getResponseBody());
- }
- ex.getResponseBody().flush();
- ex.getResponseBody().close();
- break;
-
- case NO_TRANSFORM:
+ case SEND_BODY:
if (!responseBodyClosed) {
// The Adaptor didn't close the stream, so close it for them, making
// sure to flush any existing contents. We choose to use the same
@@ -770,6 +737,9 @@
}
private void startSending(boolean hasContent) throws IOException {
+ if (transform != null) {
+ transform();
+ }
if (requestIsFromFullyTrustedClient(ex)) {
// Always specify metadata and ACLs, even when empty, to replace
// previous values.
@@ -879,18 +849,16 @@
return true;
}
- private ByteArrayOutputStream transform(byte[] content) throws IOException {
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
+ private void transform() {
Map<String, String> params = new HashMap<String, String>();
params.put("DocId", docId.getUniqueId());
params.put("Content-Type", contentType);
try {
- transform.transform(content, contentOut, metadata, params);
+ transform.transform(metadata, params);
} catch (TransformException e) {
- throw new IOException(e);
+ throw new RuntimeException("transform failed", e);
}
contentType = params.get("Content-Type");
- return contentOut;
}
/**
@@ -909,17 +877,6 @@
super.close();
}
}
-
- /**
- * Used when transform pipeline is circumvented, but the pipeline is
- * required.
- */
- private class CantUseOutputStream extends AbstractLazyOutputStream {
- protected OutputStream retrieveOs() throws IOException {
- throw new IOException("Transform pipeline is required, but document is "
- + "too large");
- }
- }
}
/**
diff --git a/src/com/google/enterprise/adaptor/DocumentTransform.java b/src/com/google/enterprise/adaptor/DocumentTransform.java
index 3d3249f..2f6526f 100644
--- a/src/com/google/enterprise/adaptor/DocumentTransform.java
+++ b/src/com/google/enterprise/adaptor/DocumentTransform.java
@@ -24,23 +24,19 @@
*
* <p>Implementations should also typically have a static factory method with a
* single {@code Map<String, String>} argument for creating instances based on
- * configuration. Implementations are encouraged to accept "name" and
- * "required" as configuration keys.
+ * configuration. Implementations are encouraged to accept "name" as a
+ * configuration key.
*/
public interface DocumentTransform {
/**
- * Read data from {@code contentIn}, transform it, and write it to {@code
- * contentOut}. Any changes to {@code metadata} and {@code params} will be
+ * Any changes to {@code metadata} and {@code params} will be
* passed on to subsequent transforms. This method must be thread-safe.
*
* @throws TransformException
* @throws IOException
*/
- public void transform(ByteArrayOutputStream contentIn,
- OutputStream contentOut,
- Metadata metadata,
- Map<String, String> params)
- throws TransformException, IOException;
+ public void transform(Metadata metadata, Map<String, String> params)
+ throws TransformException;
/**
* The name of this transform instance, typically provided by the user. It
@@ -48,15 +44,4 @@
* if no name has been provided.
*/
public String getName();
-
- /**
- * If this property is true, a failure of this transform will cause the entire
- * transform pipeline to abort. This is useful in the case where a particular
- * transform is required in order to server data. For example, a transform
- * tasked with redacting or filtering document content.
- *
- * If this is false and a error occurs, this transform is treated as a
- * identity transform.
- */
- public boolean isRequired();
}
diff --git a/src/com/google/enterprise/adaptor/GsaCommunicationHandler.java b/src/com/google/enterprise/adaptor/GsaCommunicationHandler.java
index 604ac19..ae968c2 100644
--- a/src/com/google/enterprise/adaptor/GsaCommunicationHandler.java
+++ b/src/com/google/enterprise/adaptor/GsaCommunicationHandler.java
@@ -275,8 +275,6 @@
config.getGsaHostname(),
config.getServerFullAccessHosts(),
samlServiceProvider, createTransformPipeline(),
- config.getTransformMaxDocumentBytes(),
- config.isTransformRequired(),
config.isServerToUseCompression(), watchdog,
asyncDocIdSender,
config.sendDocControlsHeader());
diff --git a/src/com/google/enterprise/adaptor/TransformPipeline.java b/src/com/google/enterprise/adaptor/TransformPipeline.java
index c187e9d..988f83f 100644
--- a/src/com/google/enterprise/adaptor/TransformPipeline.java
+++ b/src/com/google/enterprise/adaptor/TransformPipeline.java
@@ -27,7 +27,7 @@
import java.util.logging.Logger;
/**
- * Modify content and metadata using multiple serial transforms. The transforms
+ * Modify metadata using multiple serial transforms. The transforms
* are arranged into a serial pipeline where the output of one becomes the
* input for the next in the series.
*
@@ -44,62 +44,26 @@
}
/**
- * Transform {@code contentIn} and {@code metadata}. {@code ContentIn} is
- * guaranteed to remain unchanged; the rest of the parameters are expected to
- * change.
+ * Transform {@code metadata}.
*/
- public void transform(byte[] contentIn,
- OutputStream contentOut,
- Metadata metadata,
- Map<String, String> params) throws TransformException, IOException {
+ public void transform(Metadata metadata, Map<String, String> params)
+ throws TransformException {
if (transformList.isEmpty()) {
- contentOut.write(contentIn);
return;
}
- ByteArrayOutputStream contentInTransit = new ByteArrayOutputStream(contentIn.length);
- ByteArrayOutputStream contentOutTransit = new ByteArrayOutputStream(contentIn.length);
Metadata metadataInTransit = new Metadata(metadata);
Map<String, String> paramsInTransit = Collections.checkedMap(
- new HashMap<String, String>(params.size() * 2), String.class, String.class);
- Map<String, String> paramsOutTransit = Collections.checkedMap(
- new HashMap<String, String>(params.size() * 2), String.class, String.class);
-
- contentInTransit.write(contentIn);
- paramsInTransit.putAll(params);
+ new HashMap<String, String>(params), String.class, String.class);
for (DocumentTransform transform : transformList) {
- contentOutTransit.reset();
- // Invariant: metadataInTransit changes after good transform only.
- Metadata metadataOutTransit = new Metadata(metadataInTransit);
- paramsOutTransit.clear();
- paramsOutTransit.putAll(paramsInTransit);
-
try {
- transform.transform(new UnmodifiableWrapperByteArrayOutputStream(contentInTransit),
- contentOutTransit, metadataOutTransit, paramsOutTransit);
+ transform.transform(metadataInTransit, paramsInTransit);
} catch (TransformException e) {
- if (transform.isRequired()) {
- log.log(Level.WARNING, "Transform Exception. Aborting '"
- + transform.getName() + "'", e);
- throw e;
- } else {
- log.log(Level.WARNING, "Transform Exception. Ignoring transform '"
- + transform.getName() + "'", e);
- continue;
- }
+ throw new TransformException("Aborting " + transform.getName(), e);
}
- metadataInTransit = metadataOutTransit;
- metadataOutTransit = null;
- // Swap input and output. The input is reused as the output for effeciency.
- ByteArrayOutputStream tmp = contentInTransit;
- contentInTransit = contentOutTransit;
- contentOutTransit = tmp;
- Map<String, String> tmpMap = paramsInTransit;
- paramsInTransit = paramsOutTransit;
- paramsOutTransit = tmpMap;
}
- contentInTransit.writeTo(contentOut);
+
metadata.set(metadataInTransit);
params.clear();
params.putAll(paramsInTransit);
@@ -111,52 +75,4 @@
public List<DocumentTransform> getDocumentTransforms() {
return transformList;
}
-
- private static class UnmodifiableWrapperByteArrayOutputStream extends ByteArrayOutputStream {
- private ByteArrayOutputStream os;
-
- public UnmodifiableWrapperByteArrayOutputStream(ByteArrayOutputStream os) {
- this.os = os;
- }
-
- @Override
- public void reset() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int size() {
- return os.size();
- }
-
- @Override
- public byte[] toByteArray() {
- return os.toByteArray();
- }
-
- @Override
- public String toString() {
- return os.toString();
- }
-
- @Override
- public String toString(String charsetName) throws UnsupportedEncodingException {
- return os.toString(charsetName);
- }
-
- @Override
- public void write(byte[] b, int off, int len) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void write(int b) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void writeTo(OutputStream out) throws IOException {
- os.writeTo(out);
- }
- }
}
diff --git a/src/com/google/enterprise/adaptor/examples/CalaisNERTransform.java b/src/com/google/enterprise/adaptor/examples/CalaisNERTransform.java
deleted file mode 100644
index ecc759c..0000000
--- a/src/com/google/enterprise/adaptor/examples/CalaisNERTransform.java
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.enterprise.adaptor.examples;
-
-import com.google.enterprise.adaptor.AbstractDocumentTransform;
-import com.google.enterprise.adaptor.Metadata;
-import com.google.enterprise.adaptor.TransformException;
-
-import mx.bigdata.jcalais.CalaisClient;
-import mx.bigdata.jcalais.CalaisConfig;
-import mx.bigdata.jcalais.CalaisObject;
-import mx.bigdata.jcalais.CalaisResponse;
-import mx.bigdata.jcalais.rest.CalaisRestClient;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.text.MessageFormat;
-import java.util.Map;
-
-/**
- * This transform sends the content to the OpenCalais webservice, which
- * extracts named entities. We then inject this info as metadata.
- * We currently make the assumption that the incoming content is HTML.
- */
-public class CalaisNERTransform extends AbstractDocumentTransform {
-
- interface CalaisClientFactory {
- CalaisClient makeClient(String apiKey);
- }
-
- private final CalaisClientFactory clientFactory;
-
- CalaisNERTransform(CalaisClientFactory factory) {
- this.clientFactory = factory;
- }
-
- public CalaisNERTransform() {
- this(null);
- }
-
- /**
- * This transform must take in a parameter of the form:
- * <code>(OpenCalaisAPIKey, key)</code>
- *
- * Optionally, extra parameters can be passed in to set which entity types to detect.
- * <code>(UseCalaisEntity:&type&, "True"|"False")</code>
- * Valid types are:
- * <ul>
- * <li>All
- * <li>Company
- * <li>Country
- * <li>EmailAddress
- * <li>Facility
- * <li>Holiday
- * <li>IndustryTerm
- * <li>MedicalCondition
- * <li>Movie
- * <li>MusicAlbum
- * <li>MusicGroup
- * <li>Organization
- * <li>Person
- * <li>PhoneNumber
- * <li>Position
- * <li>Product
- * <li>ProvinceOrState
- * <li>PublishedMedium
- * <li>Region
- * <li>Technology
- * </ul>
- */
- @Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> params)
- throws TransformException, IOException {
- String apiKey = params.get("OpenCalaisApiKey");
- if (apiKey == null) {
- throw new IllegalArgumentException("No api key given. Please set param: OpenCalaisApiKey");
- }
- boolean includeAllEntities = "True".equals(params.get("UseCalaisEntity:All"));
- CalaisClient calaisClient;
- if (null == clientFactory) {
- calaisClient = new CalaisRestClient(apiKey);
- } else {
- calaisClient = clientFactory.makeClient(apiKey);
- }
- CalaisConfig config = new CalaisConfig();
- config.set(CalaisConfig.ProcessingParam.CONTENT_TYPE, "TEXT/HTML");
- String content = contentIn.toString("UTF-8");
-
- CalaisResponse response = calaisClient.analyze(content, config);
- StringBuilder sb = new StringBuilder();
- for (CalaisObject entity : response.getEntities()) {
- String entityType = entity.getField("_type");
- String entityName = entity.getField("name");
- String entityParamKey = "UseCalaisEntity:" + entityType;
- boolean shouldInclude = includeAllEntities || "True".equals(params.get(entityParamKey));
- if (shouldInclude) {
- sb.append(MessageFormat.format("<meta name=\"{0}\" content=\"{1}\" />\n",
- entityType, entityName));
- }
- }
- // This is a very simple insertion mechanism. It looks for the closing
- // </HEAD> element and inserts the metadata right before it.
- content = content.replaceFirst("</(HEAD|head)", "\n" + sb.toString() + "</HEAD");
- contentOut.write(content.getBytes());
- }
-
- public static CalaisNERTransform create(Map<String, String> config) {
- CalaisNERTransform transform = new CalaisNERTransform();
- transform.configure(config);
- return transform;
- }
-}
diff --git a/src/com/google/enterprise/adaptor/examples/MetaTaggerTransform.java b/src/com/google/enterprise/adaptor/examples/MetaTaggerTransform.java
deleted file mode 100644
index ce7bd56..0000000
--- a/src/com/google/enterprise/adaptor/examples/MetaTaggerTransform.java
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.enterprise.adaptor.examples;
-
-import com.google.enterprise.adaptor.AbstractDocumentTransform;
-import com.google.enterprise.adaptor.Metadata;
-import com.google.enterprise.adaptor.TransformException;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Comparator;
-import java.util.Map;
-import java.util.Scanner;
-import java.util.SortedMap;
-import java.util.TreeMap;
-import java.util.logging.Logger;
-import java.util.regex.Pattern;
-
-/**
- * The transform examines the document for regex patterns. If a pattern is found,
- * the associated metadata is inserted at the end of the HEAD section of the
- * HTML. If no HEAD section exists, nothing gets inserted.
- */
-public class MetaTaggerTransform extends AbstractDocumentTransform {
- private static final Logger log = Logger.getLogger(MetaTaggerTransform.class.getName());
-
- public MetaTaggerTransform() {}
-
- public MetaTaggerTransform(String patternFile) throws IOException {
- loadPatternFile(patternFile);
- }
-
- @Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> params)
- throws TransformException, IOException {
- String content = contentIn.toString();
- StringBuilder sb = new StringBuilder();
- for (Map.Entry<Pattern, String> entry : patternMappings.entrySet()) {
- if (entry.getKey().matcher(content).find()) {
- sb.append(entry.getValue());
- }
- }
- // This is a very simple insertion mechanism. It looks for the closing
- // </HEAD> element and inserts the metadata right before it.
- content = content.replaceFirst("</(HEAD|head)", "\n" + sb.toString() + "</HEAD");
- contentOut.write(content.getBytes());
- }
-
- private void loadPatternFile(String filename) throws IOException {
- Scanner sc = new Scanner(new File(filename));
- while (sc.hasNextLine()) {
- String line = sc.nextLine().trim();
- int sepIndex = line.indexOf(PATTERN_FILE_SEP);
- if (line.isEmpty() || sepIndex < 0) {
- continue;
- }
-
- Pattern pattern = Pattern.compile(line.substring(0, sepIndex));
- String metadata = line.substring(sepIndex + 1, line.length());
- String existing = patternMappings.get(pattern);
- if (existing == null) {
- patternMappings.put(pattern, metadata + "\n");
- } else {
- patternMappings.put(pattern, existing + metadata + "\n");
- }
- }
- }
-
- /**
- * Maps Pattern to String representation of metadata.
- * The String is assumed to be a valid HTML fragment that is pasted into the
- * HEAD section of the HTML document.
- *
- * We use a SortedMap with this comparator to ensure we get the same metadata
- * ordering for each invocation. Serving different docs each time could lead
- * to unnecessary recrawls from the GSA.
- */
- private SortedMap<Pattern, String> patternMappings =
- new TreeMap<Pattern, String>(new PatternComparator());
-
- private static final char PATTERN_FILE_SEP = ' ';
-
- private class PatternComparator implements Comparator<Pattern> {
- public int compare(Pattern p1, Pattern p2) {
- return p1.toString().compareTo(p2.toString());
- }
- public boolean equals(Pattern p1, Pattern p2) {
- return p1.toString().equals(p2.toString());
- }
- }
-
- public static MetaTaggerTransform create(Map<String, String> config)
- throws IOException {
- String patternFile = config.get("patternFile");
- MetaTaggerTransform transform;
- if (patternFile == null) {
- transform = new MetaTaggerTransform();
- } else {
- transform = new MetaTaggerTransform(patternFile);
- }
- transform.configure(config);
- return transform;
- }
-}
diff --git a/src/com/google/enterprise/adaptor/examples/TableGeneratorTransform.java b/src/com/google/enterprise/adaptor/examples/TableGeneratorTransform.java
deleted file mode 100644
index 038205d..0000000
--- a/src/com/google/enterprise/adaptor/examples/TableGeneratorTransform.java
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.enterprise.adaptor.examples;
-
-import com.google.enterprise.adaptor.AbstractDocumentTransform;
-import com.google.enterprise.adaptor.Metadata;
-import com.google.enterprise.adaptor.TransformException;
-
-import au.com.bytecode.opencsv.CSVReader;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.StringReader;
-import java.nio.MappedByteBuffer;
-import java.nio.channels.FileChannel;
-import java.nio.charset.Charset;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Logger;
-
-/**
- * This transform takes in a CSV file, generates an HTML table with the data,
- * and inserts it into a template HTML file that's provided by the user.
- * In the template HTML file, place <code>&#0;</code> where you'd like the table
- * to be inserted.
- */
-public class TableGeneratorTransform extends AbstractDocumentTransform {
- private static final Logger log = Logger.getLogger(TableGeneratorTransform.class.getName());
-
- public TableGeneratorTransform() {}
-
- public TableGeneratorTransform(String templateFile) throws IOException {
- loadTemplateFile(templateFile);
- }
-
- @Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> params)
- throws TransformException, IOException {
- String csv = contentIn.toString();
- List<String[]> records = new CSVReader(new StringReader(csv)).readAll();
- StringBuilder tableBuilder = new StringBuilder();
- if (!records.isEmpty()) {
- tableBuilder.append("<table border=\"1\">\n");
- for (String[] record : records) {
- tableBuilder.append("<tr>\n");
- for (String field : record) {
- tableBuilder.append("<td>");
- tableBuilder.append(field);
- tableBuilder.append("</td>\n");
- }
- tableBuilder.append("</tr>\n");
- }
- tableBuilder.append("</table>");
- }
- String content = htmlTemplate.replace(SIGIL, tableBuilder.toString());
- contentOut.write(content.getBytes());
- }
-
- private void loadTemplateFile(String templateFile) throws IOException {
- FileInputStream stream = new FileInputStream(new File(templateFile));
- try {
- FileChannel fc = stream.getChannel();
- MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
- htmlTemplate = Charset.defaultCharset().decode(bb).toString();
- } finally {
- stream.close();
- }
- }
-
- private String htmlTemplate = "<HTML><HEAD></HEAD><BODY>" + SIGIL + "</BODY></HTML>";
-
- /**
- * This is the placeholder that gets replaced by the generated table. We use
- * the escaped null character, because it is explicitly disallowed in HTML.
- */
- private static final String SIGIL = "�";
-
- public static TableGeneratorTransform create(Map<String, String> config)
- throws IOException {
- String templateFile = config.get("templateFile");
- TableGeneratorTransform transform;
- if (templateFile == null) {
- transform = new TableGeneratorTransform();
- } else {
- transform = new TableGeneratorTransform(templateFile);
- }
- transform.configure(config);
- return transform;
- }
-}
diff --git a/src/com/google/enterprise/adaptor/prebuilt/CommandLineTransform.java b/src/com/google/enterprise/adaptor/prebuilt/CommandLineTransform.java
index 046ef0e..4b282e5 100644
--- a/src/com/google/enterprise/adaptor/prebuilt/CommandLineTransform.java
+++ b/src/com/google/enterprise/adaptor/prebuilt/CommandLineTransform.java
@@ -81,11 +81,8 @@
}
@Override
- public void transform(ByteArrayOutputStream contentIn,
- OutputStream contentOut,
- Metadata metadata,
- Map<String, String> params)
- throws TransformException, IOException {
+ public void transform(Metadata metadata, Map<String, String> params)
+ throws TransformException {
if (transformCommand == null) {
throw new NullPointerException("transformCommand must not be null");
}
@@ -107,7 +104,7 @@
Command command = new Command();
try {
- command.exec(commandLine, workingDirectory, contentIn.toByteArray());
+ command.exec(commandLine, workingDirectory);
} catch (InterruptedException ex) {
throw new TransformException(ex);
}
@@ -126,12 +123,13 @@
log.log(Level.INFO, "Stderr: {0}", new Object[] {errorOutput});
}
- contentOut.write(command.getStdout());
if (commandAcceptsParameters) {
metadata.set(readSetFromFile(metadataFile));
params.clear();
params.putAll(readMapFromFile(paramsFile));
}
+ } catch (IOException ioe) {
+ throw new TransformException(ioe);
} finally {
if (metadataFile != null) {
metadataFile.delete();
@@ -249,11 +247,6 @@
super.setName(name);
}
- @Override
- public void setRequired(boolean required) {
- super.setRequired(required);
- }
-
public static CommandLineTransform create(Map<String, String> config) {
CommandLineTransform transform = new CommandLineTransform();
transform.configure(config);
diff --git a/test/com/google/enterprise/adaptor/DocumentHandlerTest.java b/test/com/google/enterprise/adaptor/DocumentHandlerTest.java
index 842e8b3..deed6c0 100644
--- a/test/com/google/enterprise/adaptor/DocumentHandlerTest.java
+++ b/test/com/google/enterprise/adaptor/DocumentHandlerTest.java
@@ -318,17 +318,11 @@
@Test
public void testTransform() throws Exception {
- final byte[] golden = new byte[] {2, 3, 4};
final String key = "testing key";
List<DocumentTransform> transforms = new LinkedList<DocumentTransform>();
transforms.add(new AbstractDocumentTransform() {
@Override
- public void transform(ByteArrayOutputStream contentIn,
- OutputStream contentOut,
- Metadata metadata,
- Map<String, String> params) throws IOException {
- assertArrayEquals(mockAdaptor.documentBytes, contentIn.toByteArray());
- contentOut.write(golden);
+ public void transform(Metadata metadata, Map<String, String> params) {
metadata.set(key, metadata.getOneValue(key).toUpperCase());
metadata.set("docid", params.get("DocId"));
}
@@ -347,94 +341,15 @@
.setAdaptor(mockAdaptor)
.setFullAccessHosts(new String[] {remoteIp})
.setTransform(transform)
- .setTransformMaxBytes(100)
.build();
mockAdaptor.documentBytes = new byte[] {1, 2, 3};
handler.handle(ex);
assertEquals(200, ex.getResponseCode());
- assertArrayEquals(golden, ex.getResponseBytes());
assertEquals("docid=test%20docId,testing%20key=TESTING%20VALUE",
ex.getResponseHeaders().getFirst("X-Gsa-External-Metadata"));
}
@Test
- public void testTransformDocumentTooLarge() throws Exception {
- List<DocumentTransform> transforms = new LinkedList<DocumentTransform>();
- transforms.add(new AbstractDocumentTransform() {
- @Override
- public void transform(ByteArrayOutputStream contentIn,
- OutputStream contentOut,
- Metadata metadata,
- Map<String, String> params) throws IOException {
- // This is not the content we are looking for.
- contentOut.write(new byte[] {2, 3, 4});
- }
- });
- TransformPipeline transform = new TransformPipeline(transforms);
- final byte[] golden = new byte[] {-1, 2, -3, 4, 5};
- mockAdaptor = new MockAdaptor() {
- @Override
- public void getDocContent(Request request, Response response)
- throws IOException {
- OutputStream os = response.getOutputStream();
- // Just for the heck of it, test using the single byte version.
- os.write(golden[0]);
- os.write(golden[1]);
- // Write out too much content for the buffer to hold here.
- os.write(golden, 2, golden.length - 2 - 1);
- os.write(golden, golden.length - 1, 1);
- }
- };
- String remoteIp = ex.getRemoteAddress().getAddress().getHostAddress();
- DocumentHandler handler = createHandlerBuilder()
- .setAdaptor(mockAdaptor)
- .setFullAccessHosts(new String[] {remoteIp})
- .setTransform(transform)
- .setTransformMaxBytes(3)
- .build();
- handler.handle(ex);
- assertEquals(200, ex.getResponseCode());
- assertArrayEquals(golden, ex.getResponseBytes());
- assertEquals(Arrays.asList("", ""),
- ex.getResponseHeaders().get("X-Gsa-External-Metadata"));
- }
-
- @Test
- public void testTransformDocumentTooLargeButRequired() throws Exception {
- TransformPipeline transform = new TransformPipeline(
- Collections.<DocumentTransform>emptyList());
- class CheckFailAdaptor extends MockAdaptor {
- public boolean failedAtCorrectTime = false;
-
- @Override
- public void getDocContent(Request request, Response response)
- throws IOException {
- OutputStream os = response.getOutputStream();
- os.write(new byte[] {-1, 2, -3});
- failedAtCorrectTime = true;
- // Write out too much content for the buffer to hold here.
- os.write(4);
- failedAtCorrectTime = false;
- }
- };
- CheckFailAdaptor mockAdaptor = new CheckFailAdaptor();
- String remoteIp = ex.getRemoteAddress().getAddress().getHostAddress();
- DocumentHandler handler = createHandlerBuilder()
- .setAdaptor(mockAdaptor)
- .setFullAccessHosts(new String[] {remoteIp})
- .setTransform(transform)
- .setTransformMaxBytes(3)
- .setTransformRequired(true)
- .build();
- thrown.expect(IOException.class);
- try {
- handler.handle(ex);
- } finally {
- assertTrue(mockAdaptor.failedAtCorrectTime);
- }
- }
-
- @Test
public void testNullAuthzResponse() throws Exception {
MockAdaptor adaptor = new MockAdaptor() {
@Override
@@ -1460,17 +1375,6 @@
return this;
}
- public DocumentHandlerBuilder setTransformMaxBytes(int transformMaxBytes) {
- this.transformMaxBytes = transformMaxBytes;
- return this;
- }
-
- public DocumentHandlerBuilder setTransformRequired(
- boolean transformRequired) {
- this.transformRequired = transformRequired;
- return this;
- }
-
public DocumentHandlerBuilder setUseCompression(boolean useCompression) {
this.useCompression = useCompression;
return this;
@@ -1495,8 +1399,7 @@
public DocumentHandler build() {
return new DocumentHandler(docIdDecoder, docIdEncoder, journal, adaptor,
gsaHostname, fullAccessHosts, samlServiceProvider, transform,
- transformMaxBytes, transformRequired, useCompression, watchdog,
- pusher, sendDocControls);
+ useCompression, watchdog, pusher, sendDocControls);
}
}
}
diff --git a/test/com/google/enterprise/adaptor/GsaCommunicationHandlerTest.java b/test/com/google/enterprise/adaptor/GsaCommunicationHandlerTest.java
index db3ce53..0db63e2 100644
--- a/test/com/google/enterprise/adaptor/GsaCommunicationHandlerTest.java
+++ b/test/com/google/enterprise/adaptor/GsaCommunicationHandlerTest.java
@@ -382,11 +382,7 @@
static class IdentityTransform extends AbstractDocumentTransform {
@Override
- public void transform(ByteArrayOutputStream contentIn,
- OutputStream contentOut,
- Metadata metadata,
- Map<String, String> params) throws IOException {
- contentIn.writeTo(contentOut);
+ public void transform(Metadata metadata, Map<String, String> params) {
}
}
diff --git a/test/com/google/enterprise/adaptor/TransformPipelineTest.java b/test/com/google/enterprise/adaptor/TransformPipelineTest.java
index c60b293..3ed5c91 100644
--- a/test/com/google/enterprise/adaptor/TransformPipelineTest.java
+++ b/test/com/google/enterprise/adaptor/TransformPipelineTest.java
@@ -34,12 +34,10 @@
@Test
public void testNoOpEmpty() throws IOException, TransformException {
TransformPipeline pipeline = new TransformPipeline(Collections.<DocumentTransform>emptyList());
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
Metadata metadata = new Metadata();
Map<String, String> params = new HashMap<String, String>();
- pipeline.transform(new byte[0], contentOut, metadata, params);
+ pipeline.transform(metadata, params);
- assertEquals(0, contentOut.size());
assertEquals(new Metadata(), metadata);
assertEquals(Collections.emptyMap(), params);
}
@@ -47,15 +45,12 @@
@Test
public void testNoOpWithInput() throws IOException, TransformException {
TransformPipeline pipeline = new TransformPipeline(Collections.<DocumentTransform>emptyList());
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
Metadata metadata = new Metadata();
metadata.add("key1", "value1");
Map<String, String> params = new HashMap<String, String>();
params.put("key2", "value2");
- String testString = "Here is some input";
- pipeline.transform(testString.getBytes(), contentOut, metadata, params);
+ pipeline.transform(metadata, params);
- assertEquals(testString, contentOut.toString());
Metadata goldenMetadata = new Metadata();
goldenMetadata.add("key1", "value1");
assertEquals(goldenMetadata, metadata);
@@ -72,14 +67,13 @@
List<DocumentTransform> transforms = new ArrayList<DocumentTransform>();
transforms.add(new AbstractDocumentTransform() {
@Override
- public void transform(ByteArrayOutputStream cIn, OutputStream cOut, Metadata m,
- Map<String, String> p) throws TransformException, IOException {
+ public void transform(Metadata m, Map<String, String> p) throws TransformException {
m.set("newMeta", "metaValue");
p.put("newKey", "newValue");
}
});
TransformPipeline pipeline = new TransformPipeline(transforms);
- pipeline.transform(new byte[0], new ByteArrayOutputStream(), metadata, params);
+ pipeline.transform(metadata, params);
assertEquals("value1", metadata.getOneValue("key1"));
assertEquals("metaValue", metadata.getOneValue("newMeta"));
@@ -89,16 +83,24 @@
assertEquals(2, params.size());
}
+ private static class ErroringTransform extends AbstractDocumentTransform {
+ public ErroringTransform() {
+ super(null);
+ }
+ @Override
+ public void transform(Metadata metadata, Map<String, String> p)
+ throws TransformException {
+ // Do some work, but don't complete.
+ metadata.set("trash", "value");
+ p.put("more trash", "values");
+ throw new TransformException("test exception");
+ }
+ }
+
private static class IncrementTransform extends AbstractDocumentTransform {
@Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> p)
- throws TransformException, IOException {
- byte[] content = contentIn.toByteArray();
- for (int i = 0; i < content.length; i++) {
- content[i]++;
- }
- contentOut.write(content);
+ public void transform(Metadata metadata, Map<String, String> p)
+ throws TransformException {
metadata.set("int", "" + (Integer.parseInt(metadata.getOneValue("int")) + 1));
p.put("int", "" + (Integer.parseInt(p.get("int")) + 1));
}
@@ -112,47 +114,23 @@
}
@Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> p)
- throws TransformException, IOException {
- byte[] content = contentIn.toByteArray();
- for (int i = 0; i < content.length; i++) {
- content[i] *= factor;
- }
- contentOut.write(content);
+ public void transform(Metadata metadata, Map<String, String> p)
+ throws TransformException {
metadata.set("int", "" + (Integer.parseInt(metadata.getOneValue("int")) * factor));
p.put("int", "" + (Integer.parseInt(p.get("int")) * factor));
}
}
- private static class ErroringTransform extends AbstractDocumentTransform {
- public ErroringTransform(boolean required) {
- super(null, required);
- }
- @Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> p)
- throws TransformException, IOException {
- // Do some work, but don't complete.
- contentOut.write(new byte[] {1});
- metadata.set("trash", "value");
- p.put("more trash", "values");
- throw new TransformException("test exception");
- }
- }
-
@Test
public void testTransform() throws IOException, TransformException {
TransformPipeline pipeline = new TransformPipeline(Arrays.asList(new IncrementTransform()));
- ByteArrayOutputStream out = new ByteArrayOutputStream();
Metadata metadata = new Metadata();
metadata.add("int", "0");
Map<String, String> params = new HashMap<String, String>();
params.put("int", "1");
- pipeline.transform(new byte[] {1, 2, 3}, out, metadata, params);
+ pipeline.transform(metadata, params);
- assertArrayEquals(new byte[] {2, 3, 4}, out.toByteArray());
Metadata goldenMetadata = new Metadata();
goldenMetadata.add("int", "1");
assertEquals(goldenMetadata, metadata);
@@ -164,15 +142,13 @@
TransformPipeline pipeline = new TransformPipeline(Arrays.asList(
new IncrementTransform(), new ProductTransform(2)));
- ByteArrayOutputStream out = new ByteArrayOutputStream();
Metadata metadata = new Metadata();
metadata.set("int", "0");
Map<String, String> params = new HashMap<String, String>();
params.put("int", "1");
- pipeline.transform(new byte[] {1, 2, 3}, out, metadata, params);
+ pipeline.transform(metadata, params);
- assertArrayEquals(new byte[] {4, 6, 8}, out.toByteArray());
Metadata goldenMetadata = new Metadata();
goldenMetadata.set("int", "2");
assertEquals(goldenMetadata, metadata);
@@ -180,48 +156,9 @@
}
@Test
- public void testNotLastTransformError() throws IOException, TransformException {
- TransformPipeline pipeline = new TransformPipeline(Arrays.asList(
- new IncrementTransform(), new ErroringTransform(false)));
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- metadata.set("int", "0");
- Map<String, String> params = new HashMap<String, String>();
- params.put("int", "1");
-
- pipeline.transform(new byte[] {1, 2, 3}, out, metadata, params);
-
- assertArrayEquals(new byte[] {2, 3, 4}, out.toByteArray());
- Metadata goldenMetadata = new Metadata();
- goldenMetadata.add("int", "1");
- assertEquals(goldenMetadata, metadata);
- assertEquals(Collections.singletonMap("int", "2"), params);
- }
-
- @Test
- public void testLastTransformError() throws IOException, TransformException {
- TransformPipeline pipeline = new TransformPipeline(Arrays.asList(
- new ErroringTransform(false), new IncrementTransform()));
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- metadata.set("int", "0");
- Map<String, String> params = new HashMap<String, String>();
- params.put("int", "1");
-
- pipeline.transform(new byte[] {1, 2, 3}, out, metadata, params);
-
- assertArrayEquals(new byte[] {2, 3, 4}, out.toByteArray());
- Metadata goldenMetadata = new Metadata();
- goldenMetadata.set("int", "1");
- assertEquals(goldenMetadata, metadata);
- assertEquals(Collections.singletonMap("int", "2"), params);
- }
-
- @Test
public void testTransformErrorFatal() throws IOException, TransformException {
TransformPipeline pipeline = new TransformPipeline(Arrays.asList(
- new IncrementTransform(), new ErroringTransform(true)));
- ByteArrayOutputStream out = new ByteArrayOutputStream();
+ new IncrementTransform(), new ErroringTransform()));
Metadata metadata = new Metadata();
metadata.set("int", "0");
Map<String, String> params = new HashMap<String, String>();
@@ -229,85 +166,12 @@
thrown.expect(TransformException.class);
try {
- pipeline.transform(new byte[] {1, 2, 3}, out, metadata, params);
+ pipeline.transform(metadata, params);
} finally {
- assertArrayEquals(new byte[] {}, out.toByteArray());
Metadata goldenMetadata = new Metadata();
goldenMetadata.set("int", "0");
assertEquals(goldenMetadata, metadata);
assertEquals(Collections.singletonMap("int", "1"), params);
}
}
-
- @Test
- public void testResetTransform() throws Exception {
- List<DocumentTransform> transforms = new ArrayList<DocumentTransform>();
- transforms.add(new AbstractDocumentTransform() {
- @Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> p)
- throws IOException {
- // Modifying contentIn is not allowed.
- contentIn.reset();
- }
- });
- TransformPipeline pipeline = new TransformPipeline(transforms);
- thrown.expect(UnsupportedOperationException.class);
- pipeline.transform(new byte[] {1, 2, 3}, new ByteArrayOutputStream(),
- new Metadata(), new HashMap<String, String>());
- }
-
- @Test
- public void testWriteTransform1() throws Exception {
- List<DocumentTransform> transforms = new ArrayList<DocumentTransform>();
- transforms.add(new AbstractDocumentTransform() {
- @Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> p)
- throws IOException {
- // Modifying contentIn is not allowed.
- contentIn.write(new byte[1], 0, 1);
- }
- });
- TransformPipeline pipeline = new TransformPipeline(transforms);
- thrown.expect(UnsupportedOperationException.class);
- pipeline.transform(new byte[] {1, 2, 3}, new ByteArrayOutputStream(),
- new Metadata(), new HashMap<String, String>());
- }
-
- @Test
- public void testWriteTransform2() throws Exception {
- List<DocumentTransform> transforms = new ArrayList<DocumentTransform>();
- transforms.add(new AbstractDocumentTransform() {
- @Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> p)
- throws IOException {
- // Modifying contentIn is not allowed.
- contentIn.write(0);
- }
- });
- TransformPipeline pipeline = new TransformPipeline(transforms);
- thrown.expect(UnsupportedOperationException.class);
- pipeline.transform(new byte[] {1, 2, 3}, new ByteArrayOutputStream(),
- new Metadata(), new HashMap<String, String>());
- }
-
- @Test
- public void testWriteTransform3() throws Exception {
- List<DocumentTransform> transforms = new ArrayList<DocumentTransform>();
- transforms.add(new AbstractDocumentTransform() {
- @Override
- public void transform(ByteArrayOutputStream contentIn, OutputStream contentOut,
- Metadata metadata, Map<String, String> p)
- throws IOException {
- // Modifying contentIn is not allowed.
- contentIn.write(new byte[1]);
- }
- });
- TransformPipeline pipeline = new TransformPipeline(transforms);
- thrown.expect(UnsupportedOperationException.class);
- pipeline.transform(new byte[] {1, 2, 3}, new ByteArrayOutputStream(),
- new Metadata(), new HashMap<String, String>());
- }
}
diff --git a/test/com/google/enterprise/adaptor/examples/CalaisNERTransformTest.java b/test/com/google/enterprise/adaptor/examples/CalaisNERTransformTest.java
deleted file mode 100644
index aa76a4e..0000000
--- a/test/com/google/enterprise/adaptor/examples/CalaisNERTransformTest.java
+++ /dev/null
@@ -1,198 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.enterprise.adaptor.examples;
-
-import static org.junit.Assert.*;
-
-import com.google.enterprise.adaptor.Metadata;
-import com.google.enterprise.adaptor.TransformException;
-
-import mx.bigdata.jcalais.CalaisClient;
-import mx.bigdata.jcalais.CalaisConfig;
-import mx.bigdata.jcalais.CalaisObject;
-import mx.bigdata.jcalais.CalaisResponse;
-
-import org.junit.Test;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Tests for {@link CalaisNERTransform}.
- */
-public class CalaisNERTransformTest {
-
- private static class MockCalaisObject implements CalaisObject {
- private String type, name;
- MockCalaisObject(String t, String n) {
- this.type = t;
- this.name = n;
- }
-
- public String getField(String field) {
- if ("_type".equals(field)) {
- return type;
- } else if ("name".equals(field)) {
- return name;
- } else {
- throw new IllegalArgumentException();
- }
- }
-
- public Iterable getList(String field) {
- throw new UnsupportedOperationException();
- }
- }
-
- private static class MockCalaisResponse implements CalaisResponse {
-
- public CalaisObject getMeta() {
- throw new UnsupportedOperationException();
- }
-
- public CalaisObject getInfo() {
- throw new UnsupportedOperationException();
- }
-
- public Iterable<CalaisObject> getTopics() {
- throw new UnsupportedOperationException();
- }
-
- public Iterable<CalaisObject> getEntities() {
- List<CalaisObject> ents = new ArrayList<CalaisObject>();
- ents.add(new MockCalaisObject("Person", "Charles Taylor"));
- ents.add(new MockCalaisObject("Position", "President"));
- ents.add(new MockCalaisObject("Person", "Naomi Campbell"));
- ents.add(new MockCalaisObject("Country", "Sierra Leone"));
- ents.add(new MockCalaisObject("NaturalFeature", "Sierra Leone"));
- ents.add(new MockCalaisObject("Country", "Liberia"));
- return ents;
- }
-
- public Iterable<CalaisObject> getSocialTags() {
- throw new UnsupportedOperationException();
- }
-
- public Iterable<CalaisObject> getRelations() {
- throw new UnsupportedOperationException();
- }
- }
-
- private static class MockCalaisClient implements CalaisClient {
- public CalaisResponse analyze(URL url) {
- throw new UnsupportedOperationException();
- }
-
- public CalaisResponse analyze(URL url, CalaisConfig config){
- throw new UnsupportedOperationException();
- }
-
- public CalaisResponse analyze(Readable readable) {
- throw new UnsupportedOperationException();
- }
-
- public CalaisResponse analyze(Readable readable, CalaisConfig config){
- throw new UnsupportedOperationException();
- }
-
- public CalaisResponse analyze(String content) {
- throw new UnsupportedOperationException();
- }
-
- public CalaisResponse analyze(String content, CalaisConfig config) {
- return new MockCalaisResponse();
- }
- }
-
- private static class Factory implements CalaisNERTransform.CalaisClientFactory {
- public CalaisClient makeClient(String apiKey) {
- return new MockCalaisClient();
- }
- }
-
- // Note: These tests expect specific entities to be detected by OpenCalais.
- // Long term, we should mock out the webservice so we're not flaky.
-
- @Test
- public void testRestrictedSet() throws IOException, TransformException {
- CalaisNERTransform transform = new CalaisNERTransform(new Factory());
- ByteArrayOutputStream contentIn = new ByteArrayOutputStream();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- Map<String, String> params = new HashMap<String, String>();
- params.put("OpenCalaisApiKey", "4ydv87zawg7tf29jzex22d9u");
- params.put("UseCalaisEntity:Person", "True");
- params.put("UseCalaisEntity:Position", "True");
- // Test that "Country" is implicitly "True" when All isn't specified.
- params.put("UseCalaisEntity:NaturalFeature", "False");
-
- String testInput = "<HTML><HEAD></HEAD><BODY>"
- + "Prosecutors at the trial of former Liberian President Charles Taylor"
- + " hope the testimony of supermodel Naomi Campbell "
- + " will link Taylor to the trade in illegal conflict diamonds, "
- + " which they say he used to fund a bloody civil war in Sierra Leone."
- + "</BODY></HTML>";
- String golden = "<HTML><HEAD>\n"
- + "<meta name=\"Person\" content=\"Charles Taylor\" />\n"
- + "<meta name=\"Position\" content=\"President\" />\n"
- + "<meta name=\"Person\" content=\"Naomi Campbell\" />\n"
- + "</HEAD><BODY>Prosecutors at the trial of former Liberian President Charles Taylor"
- + " hope the testimony of supermodel Naomi Campbell "
- + " will link Taylor to the trade in illegal conflict diamonds, "
- + " which they say he used to fund a bloody civil war in Sierra Leone."
- + "</BODY></HTML>";
- contentIn.write(testInput.getBytes());
- transform.transform(contentIn, contentOut, metadata, params);
- assertEquals(golden, contentOut.toString());
- }
-
- @Test
- public void testAllEntities() throws IOException, TransformException {
- CalaisNERTransform transform = new CalaisNERTransform(new Factory());
- ByteArrayOutputStream contentIn = new ByteArrayOutputStream();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- Map<String, String> params = new HashMap<String, String>();
- params.put("OpenCalaisApiKey", "4ydv87zawg7tf29jzex22d9u");
- params.put("UseCalaisEntity:All", "True");
-
- String testInput = "<HTML><HEAD></HEAD><BODY>"
- + "Prosecutors at the trial of former Liberian President Charles Taylor"
- + " hope the testimony of supermodel Naomi Campbell "
- + " will link Taylor to the trade in illegal conflict diamonds, "
- + " which they say he used to fund a bloody civil war in Sierra Leone."
- + "</BODY></HTML>";
- String golden = "<HTML><HEAD>\n"
- + "<meta name=\"Person\" content=\"Charles Taylor\" />\n"
- + "<meta name=\"Position\" content=\"President\" />\n"
- + "<meta name=\"Person\" content=\"Naomi Campbell\" />\n"
- + "<meta name=\"Country\" content=\"Sierra Leone\" />\n"
- + "<meta name=\"NaturalFeature\" content=\"Sierra Leone\" />\n"
- + "<meta name=\"Country\" content=\"Liberia\" />\n"
- + "</HEAD><BODY>Prosecutors at the trial of former Liberian President Charles Taylor"
- + " hope the testimony of supermodel Naomi Campbell "
- + " will link Taylor to the trade in illegal conflict diamonds, "
- + " which they say he used to fund a bloody civil war in Sierra Leone."
- + "</BODY></HTML>";
- contentIn.write(testInput.getBytes());
- transform.transform(contentIn, contentOut, metadata, params);
- assertEquals(golden, contentOut.toString());
- }
-}
diff --git a/test/com/google/enterprise/adaptor/examples/MetaTaggerTransformTest.java b/test/com/google/enterprise/adaptor/examples/MetaTaggerTransformTest.java
deleted file mode 100644
index 886af4b..0000000
--- a/test/com/google/enterprise/adaptor/examples/MetaTaggerTransformTest.java
+++ /dev/null
@@ -1,165 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.enterprise.adaptor.examples;
-
-import static org.junit.Assert.*;
-
-import com.google.enterprise.adaptor.Metadata;
-import com.google.enterprise.adaptor.TransformException;
-
-import org.junit.Test;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Tests for {@link MetaTaggerTransform}.
- */
-public class MetaTaggerTransformTest {
- private static final String TEST_DIR = "test/com/google/enterprise/adaptor/";
-
- @Test
- public void testNoInput() throws IOException, TransformException {
- MetaTaggerTransform transform = new MetaTaggerTransform();
- ByteArrayOutputStream contentIn = new ByteArrayOutputStream();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- Map<String, String> params = new HashMap<String, String>();
- params.put("key1", "value1");
-
- String testString = "";
- contentIn.write(testString.getBytes());
- transform.transform(contentIn, contentOut, metadata, params);
-
- assertEquals(testString, contentIn.toString());
- assertEquals(testString, contentOut.toString());
- assertTrue(metadata.isEmpty());
- assertEquals("value1", params.get("key1"));
- assertEquals(1, params.keySet().size());
- }
-
- @Test
- public void testNoPattern() throws IOException, TransformException {
- MetaTaggerTransform transform = new MetaTaggerTransform();
- ByteArrayOutputStream contentIn = new ByteArrayOutputStream();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- Map<String, String> params = new HashMap<String, String>();
- params.put("key1", "value1");
-
- String testString = "Here is some input";
- contentIn.write(testString.getBytes());
- transform.transform(contentIn, contentOut, metadata, params);
-
- assertEquals(testString, contentIn.toString());
- assertEquals(testString, contentOut.toString());
- assertTrue(metadata.isEmpty());
- assertEquals("value1", params.get("key1"));
- assertEquals(1, params.keySet().size());
- }
-
- @Test
- public void testSimple() throws IOException, TransformException {
- MetaTaggerTransform transform = new MetaTaggerTransform(TEST_DIR + "testPattern1.txt");
- ByteArrayOutputStream contentIn = new ByteArrayOutputStream();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- Map<String, String> params = new HashMap<String, String>();
- params.put("key1", "value1");
- String content =
- "<HTML>\n" +
- "<HEAD></head>\n" +
- "<BODY>\n" +
- "Today, John Paul gave a speach at local animal shelter. Animal lovers rejoice.\n" +
- "</BODY>\n" +
- "</HTML>\n";
- String goldenContent =
- "<HTML>\n" +
- "<HEAD>\n" +
- "<meta name=\"pope\" content=\"John Paul 2nd\" />\n" +
- "<meta name=\"city\" content=\"Mountain View\" />\n" +
- "</HEAD>\n" +
- "<BODY>\n" +
- "Today, John Paul gave a speach at local animal shelter. Animal lovers rejoice.\n" +
- "</BODY>\n" +
- "</HTML>\n";
- contentIn.write(content.getBytes());
- transform.transform(contentIn, contentOut, metadata, params);
-
- assertEquals(goldenContent, contentOut.toString());
- assertTrue(metadata.isEmpty());
- assertEquals("value1", params.get("key1"));
- assertEquals(1, params.keySet().size());
- }
-
- @Test
- public void testNoHead() throws IOException, TransformException {
- MetaTaggerTransform transform = new MetaTaggerTransform(TEST_DIR + "testPattern1.txt");
- ByteArrayOutputStream contentIn = new ByteArrayOutputStream();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- Map<String, String> params = new HashMap<String, String>();
- params.put("key1", "value1");
- String content =
- "This is a document with no head element.\n" +
- "If there were a HEAD element, then the\n" +
- "transform would be inserting metadata somewhere in this doc.\n" +
- " We should end up with the same output as input.\n";
- contentIn.write(content.getBytes());
- transform.transform(contentIn, contentOut, metadata, params);
-
- assertEquals(content, contentOut.toString());
- assertTrue(metadata.isEmpty());
- assertEquals("value1", params.get("key1"));
- assertEquals(1, params.keySet().size());
- }
-
- @Test
- public void testDuplicatePatternsInPatternFile() throws IOException, TransformException {
- MetaTaggerTransform transform = new MetaTaggerTransform(TEST_DIR + "testPatternDup.txt");
- ByteArrayOutputStream contentIn = new ByteArrayOutputStream();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- Map<String, String> params = new HashMap<String, String>();
- params.put("key1", "value1");
-
- String content =
- "<HTML>\n" +
- "<HEAD></head>\n" +
- "<BODY>\n" +
- "Today, John Paul gave a speach at local animal shelter. Animal lovers rejoice.\n" +
- "</BODY>\n" +
- "</HTML>\n";
- String goldenContent = "<HTML>\n" +
- "<HEAD>\n" +
- "<meta name=\"pope\" content=\"John Paul 2nd\" />\n" +
- "<meta name=\"pope\" content=\"John Paul 3rd\" />\n" +
- "<meta name=\"city\" content=\"Mountain View\" />\n" +
- "</HEAD>\n" +
- "<BODY>\n" +
- "Today, John Paul gave a speach at local animal shelter. Animal lovers rejoice.\n" +
- "</BODY>\n" +
- "</HTML>\n";
- contentIn.write(content.getBytes());
- transform.transform(contentIn, contentOut, metadata, params);
-
- assertEquals(goldenContent, contentOut.toString());
- assertTrue(metadata.isEmpty());
- assertEquals("value1", params.get("key1"));
- assertEquals(1, params.keySet().size());
- }
-}
diff --git a/test/com/google/enterprise/adaptor/examples/TableGeneratorTransformTest.java b/test/com/google/enterprise/adaptor/examples/TableGeneratorTransformTest.java
deleted file mode 100644
index 47d5077..0000000
--- a/test/com/google/enterprise/adaptor/examples/TableGeneratorTransformTest.java
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.enterprise.adaptor.examples;
-
-import static org.junit.Assert.*;
-
-import com.google.enterprise.adaptor.Metadata;
-import com.google.enterprise.adaptor.TransformException;
-
-import org.junit.Test;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Tests for {@link TableGeneratorTransform}.
- */
-public class TableGeneratorTransformTest {
- @Test
- public void testNoInput() throws IOException, TransformException {
- TableGeneratorTransform transform = new TableGeneratorTransform();
- ByteArrayOutputStream contentIn = new ByteArrayOutputStream();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- Map<String, String> params = new HashMap<String, String>();
- params.put("key1", "value1");
- transform.transform(contentIn, contentOut, metadata, params);
-
- String actualOutput = contentOut.toString();
- assertEquals("<HTML><HEAD></HEAD><BODY></BODY></HTML>", actualOutput);
- assertTrue(metadata.isEmpty());
- assertEquals("value1", params.get("key1"));
- assertEquals(1, params.keySet().size());
- }
-
- @Test
- public void testFull() throws IOException, TransformException {
- TableGeneratorTransform transform = new TableGeneratorTransform();
- ByteArrayOutputStream contentIn = new ByteArrayOutputStream();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- Metadata metadata = new Metadata();
- Map<String, String> params = new HashMap<String, String>();
- params.put("key1", "value1");
-
- String csv = "header1,\"\"\"header2\"\"\",\"This is a header\n" +
- "with a newline\",header4\n" +
- "This is the first field of the second record,\"This field has\n" +
- "a newline\",,field4";
- String goldenOutput =
- "<HTML><HEAD></HEAD><BODY><table border=\"1\">\n" +
- "<tr>\n" +
- "<td>header1</td>\n" +
- "<td>\"header2\"</td>\n" +
- "<td>This is a header\n" +
- "with a newline</td>\n" +
- "<td>header4</td>\n" +
- "</tr>\n" +
- "<tr>\n" +
- "<td>This is the first field of the second record</td>\n" +
- "<td>This field has\n" +
- "a newline</td>\n" +
- "<td></td>\n" +
- "<td>field4</td>\n" +
- "</tr>\n" +
- "</table></BODY></HTML>";
- contentIn.write(csv.getBytes());
- transform.transform(contentIn, contentOut, metadata, params);
-
- assertEquals(goldenOutput, contentOut.toString());
- assertTrue(metadata.isEmpty());
- assertEquals("value1", params.get("key1"));
- assertEquals(1, params.keySet().size());
- }
-}
diff --git a/test/com/google/enterprise/adaptor/prebuilt/CommandLineTransformTest.java b/test/com/google/enterprise/adaptor/prebuilt/CommandLineTransformTest.java
index 443e3bb..effeab8 100644
--- a/test/com/google/enterprise/adaptor/prebuilt/CommandLineTransformTest.java
+++ b/test/com/google/enterprise/adaptor/prebuilt/CommandLineTransformTest.java
@@ -36,8 +36,6 @@
TestHelper.assumeOsIsNotWindows();
ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- // The newline causes the test to work with both BSD and GNU sed.
- String testStr = "testing\n";
Metadata metadata = new Metadata();
metadata.add("metaKey1", "metaValue1");
Map<String, String> params = new HashMap<String, String>();
@@ -47,9 +45,8 @@
cmd.setTransformCommand(Arrays.asList(new String[] {"sed", "s/i/1/"}));
cmd.setCommandAcceptsParameters(false);
TransformPipeline pipeline = new TransformPipeline(Arrays.asList(cmd));
- pipeline.transform(testStr.getBytes(), contentOut, metadata, params);
+ pipeline.transform(metadata, params);
- assertEquals(testStr.replace("i", "1"), contentOut.toString());
assertEquals("metaValue1", metadata.getOneValue("metaKey1"));
assertEquals(1, metadata.getKeys().size());
assertEquals("value1", params.get("key1"));
@@ -60,9 +57,6 @@
public void testSedWithMetadata() throws IOException, TransformException {
TestHelper.assumeOsIsNotWindows();
- ByteArrayOutputStream contentOut = new ByteArrayOutputStream();
- // The newline causes the test to work with both BSD and GNU sed.
- String testStr = "testing\n";
Metadata metadata = new Metadata();
metadata.add("metaKey1", "metaValue1");
Map<String, String> params = new HashMap<String, String>();
@@ -70,8 +64,8 @@
CommandLineTransform cmd = new CommandLineTransform();
cmd.setTransformCommand(Arrays.asList(new String[] {"/bin/sh", "-c",
- // Process content.
- "sed s/i/1/; META=\"$0\"; PARAM=\"$1\"; TMPFILE=$(mktemp /tmp/adaptor.test.XXXXXXXX);"
+ // Setup variables and temp space.
+ " META=\"$0\"; PARAM=\"$1\"; TMPFILE=$(mktemp /tmp/adaptor.test.XXXXXXXX);"
// Process metadata.
+ "(sed s/1/2/g < \"$META\" > \"$TMPFILE\"; cp \"$TMPFILE\" \"$META\") >&2;"
// Process params.
@@ -81,9 +75,8 @@
}));
cmd.setCommandAcceptsParameters(true);
TransformPipeline pipeline = new TransformPipeline(Arrays.asList(cmd));
- pipeline.transform(testStr.getBytes(), contentOut, metadata, params);
+ pipeline.transform(metadata, params);
- assertEquals(testStr.replace("i", "1"), contentOut.toString());
assertEquals(1, metadata.getKeys().size());
assertEquals("metaValue2", metadata.getOneValue("metaKey2"));
assertEquals("value3", params.get("key3"));