blob: be0a037e699753f6329f1546d9224ad646cc4074 [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package adaptorlib;
import org.w3c.dom.*;
import java.io.*;
import java.util.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
/** Makes XML metadata-and-url feed file from DocIds.
This code is based on information provided by Google at
http://code.google.com/apis/searchappliance/documentation/64/feedsguide.html
*/
class GsaFeedFileMaker {
private static final Metadata EMPTY_METADATA_DEFAULT
= new Metadata(Collections.singleton(MetaItem.isPublic()));
private DocIdEncoder idEncoder;
public GsaFeedFileMaker(DocIdEncoder encoder) {
this.idEncoder = encoder;
}
/** Adds header to document's root.
@param srcName Used as datasource name. */
private void constructMetadataAndUrlFeedFileHead(Document doc,
Element root, String srcName) {
Comment comment = doc.createComment("GSA EasyConnector");
root.appendChild(comment);
Element header = doc.createElement("header");
root.appendChild(header);
Element datasource = doc.createElement("datasource");
header.appendChild(datasource);
Element feedtype = doc.createElement("feedtype");
header.appendChild(feedtype);
Text srcText = doc.createTextNode(srcName);
datasource.appendChild(srcText);
Text feedText = doc.createTextNode("metadata-and-url");
feedtype.appendChild(feedText);
}
/** Adds a single record to feed-file-document's group,
communicating the information represented by DocId. */
private void constructSingleMetadataAndUrlFeedFileRecord(
Document doc, Element group, DocInfo docRecord) {
DocId docForGsa = docRecord.getDocId();
Metadata metadata = docRecord.getMetadata();
Element record = doc.createElement("record");
group.appendChild(record);
record.setAttribute("url", "" + idEncoder.encodeDocId(docForGsa));
record.setAttribute("action",
Metadata.DELETED.equals(metadata) ? "delete" : "add");
record.setAttribute("mimetype", "text/plain"); // Required but ignored :)
// Deleted items must not have a metadata tag.
if (!Metadata.DELETED.equals(metadata)) {
Element metadataXml = doc.createElement("metadata");
record.appendChild(metadataXml);
if (!Metadata.EMPTY.equals(metadata)) {
addMetadataHelper(doc, metadataXml, metadata);
} else {
// The GSA requires a metadata tag and at least one item within, so we
// add some useless piece of metadata.
addMetadataHelper(doc, metadataXml, EMPTY_METADATA_DEFAULT);
}
}
// TODO(pjo): Add "no-recrawl" signal.
// TODO(pjo): Add "crawl-immediately" signal.
// TODO(pjo): Add "no-follow" signal.
}
private void addMetadataHelper(Document doc, Element metadataXml,
Metadata metadataValues) {
for (MetaItem item : metadataValues) {
Element metaXml = doc.createElement("meta");
metadataXml.appendChild(metaXml);
metaXml.setAttribute("name", item.getName());
metaXml.setAttribute("content", item.getValue());
}
}
/** Adds all the DocIds into feed-file-document one record
at a time. */
private void constructMetadataAndUrlFeedFileBody(Document doc,
Element root, List<DocInfo> docInfos) {
Element group = doc.createElement("group");
root.appendChild(group);
for (DocInfo docRecord : docInfos) {
constructSingleMetadataAndUrlFeedFileRecord(doc, group, docRecord);
}
}
/** Puts all DocId into metadata-and-url GSA feed file. */
private void constructMetadataAndUrlFeedFile(Document doc,
String srcName, List<DocInfo> docInfos) {
Element root = doc.createElement("gsafeed");
doc.appendChild(root);
constructMetadataAndUrlFeedFileHead(doc, root, srcName);
constructMetadataAndUrlFeedFileBody(doc, root, docInfos);
}
/** Makes a Java String from the XML feed-file-document passed in. */
private String documentToString(Document doc)
throws TransformerConfigurationException, TransformerException {
TransformerFactory transfac = TransformerFactory.newInstance();
Transformer trans = transfac.newTransformer();
String doctype = "-//Google//DTD GSA Feeds//EN";
trans.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, doctype);
trans.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, "");
trans.setOutputProperty(OutputKeys.INDENT, "yes");
trans.setOutputProperty(OutputKeys.STANDALONE, "no");
StringWriter sw = new StringWriter();
StreamResult result = new StreamResult(sw);
DOMSource source = new DOMSource(doc);
trans.transform(source, result);
String xmlString = "" + sw;
return xmlString;
}
/** Makes a metadata-and-url feed file from upto
provided DocIds and source name. Is used by
GsaCommunicationHandler.pushDocIds(). */
public String makeMetadataAndUrlXml(String srcName, List<DocInfo> docInfos) {
try {
DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
Document doc = docBuilder.newDocument();
constructMetadataAndUrlFeedFile(doc, srcName, docInfos);
String xmlString = documentToString(doc);
return xmlString;
} catch (TransformerConfigurationException tce) {
throw new IllegalStateException(tce);
} catch (TransformerException te) {
throw new IllegalStateException(te);
} catch (ParserConfigurationException pce) {
throw new IllegalStateException(pce);
}
}
}