blob: ac6c62e4cc09d3f43608f8150726a8e682aa97d8 [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.enterprise.adaptor;
import static java.util.Map.Entry;
import com.google.common.annotations.VisibleForTesting;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpsExchange;
import org.json.simple.JSONObject;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.naming.InvalidNameException;
import javax.naming.ldap.LdapName;
import javax.naming.ldap.Rdn;
import javax.net.ssl.SSLPeerUnverifiedException;
import javax.security.auth.x500.X500Principal;
class DocumentHandler implements HttpHandler {
private static final Logger log
= Logger.getLogger(DocumentHandler.class.getName());
private final DocIdDecoder docIdDecoder;
private final DocIdEncoder docIdEncoder;
private final Journal journal;
private final Adaptor adaptor;
private final AuthzAuthority authzAuthority;
private final Watchdog watchdog;
private final AsyncPusher pusher;
/**
* List of Common Names of Subjects that are provided full access when in
* secure mode. All entries should be lower case.
*/
private final Set<String> fullAccessCommonNames = new HashSet<String>();
/**
* List of IPs that are provided full access when not in secure mode.
*/
private final Set<InetAddress> fullAccessAddresses
= new HashSet<InetAddress>();
private final SamlServiceProvider samlServiceProvider;
private final TransformPipeline transform;
private final AclTransform aclTransform;
private final boolean useCompression;
private final boolean sendDocControls;
private final long headerTimeoutMillis;
private final long contentTimeoutMillis;
private final String scoring;
/**
* {@code samlServiceProvider} and {@code transform} may be {@code null}.
*/
public DocumentHandler(DocIdDecoder docIdDecoder, DocIdEncoder docIdEncoder,
Journal journal, Adaptor adaptor,
AuthzAuthority authzAuthority,
String gsaHostname, String[] fullAccessHosts,
SamlServiceProvider samlServiceProvider,
TransformPipeline transform, AclTransform aclTransform,
boolean useCompression,
Watchdog watchdog, AsyncPusher pusher,
boolean sendDocControls, long headerTimeoutMillis,
long contentTimeoutMillis, String scoringType) {
if (docIdDecoder == null || docIdEncoder == null || journal == null
|| adaptor == null || aclTransform == null || watchdog == null
|| pusher == null || scoringType == null) {
throw new NullPointerException();
}
this.docIdDecoder = docIdDecoder;
this.docIdEncoder = docIdEncoder;
this.journal = journal;
this.adaptor = adaptor;
this.authzAuthority = authzAuthority;
this.samlServiceProvider = samlServiceProvider;
this.transform = transform;
this.aclTransform = aclTransform;
this.useCompression = useCompression;
this.watchdog = watchdog;
this.pusher = pusher;
this.sendDocControls = sendDocControls;
this.headerTimeoutMillis = headerTimeoutMillis;
this.contentTimeoutMillis = contentTimeoutMillis;
this.scoring = scoringType;
initFullAccess(gsaHostname, fullAccessHosts);
}
private void initFullAccess(String gsaHostname, String[] fullAccessHosts) {
fullAccessCommonNames.add(gsaHostname.toLowerCase(Locale.ENGLISH));
for (String hostname : fullAccessHosts) {
hostname = hostname.trim();
if ("".equals(hostname)) {
continue;
}
fullAccessCommonNames.add(hostname.toLowerCase(Locale.ENGLISH));
}
log.log(Level.INFO, "When in secure mode, common names that are given full "
+ "access to content: {0}", new Object[] {fullAccessCommonNames});
for (String hostname : fullAccessCommonNames) {
try {
InetAddress[] ips = InetAddress.getAllByName(hostname);
fullAccessAddresses.addAll(Arrays.asList(ips));
} catch (UnknownHostException ex) {
log.log(Level.WARNING, "Could not resolve hostname. Not adding it to "
+ "full access list of IPs: " + hostname, ex);
}
}
log.log(Level.INFO, "When not in secure mode, IPs that are given full "
+ "access to content: {0}", new Object[] {fullAccessAddresses});
}
private boolean requestIsFromFullyTrustedClient(HttpExchange ex) {
boolean trust;
if (ex instanceof HttpsExchange) {
java.security.Principal principal;
try {
principal = ((HttpsExchange) ex).getSSLSession().getPeerPrincipal();
} catch (SSLPeerUnverifiedException e) {
log.log(Level.FINE, "Client is not trusted. It does not have a verified"
+ " client certificate", e);
return false;
}
if (!(principal instanceof X500Principal)) {
log.fine("Client is not trusted. It does not have a X500 principal");
return false;
}
LdapName dn;
try {
// getName() provides RFC2253-encoded data.
dn = new LdapName(principal.getName());
} catch (InvalidNameException e) {
// Getting here may represent a bug in the standard libraries.
log.log(Level.FINE, "Client is not trusted. The X500 principal could "
+ "not be parsed", e);
return false;
}
String commonName = null;
for (Rdn rdn : dn.getRdns()) {
if ("CN".equalsIgnoreCase(rdn.getType())
&& (rdn.getValue() instanceof String)) {
commonName = (String) rdn.getValue();
break;
}
}
if (commonName == null) {
log.log(Level.FINE, "Client is not trusted. Could not find Common "
+ "Name");
return false;
}
commonName = commonName.toLowerCase(Locale.ENGLISH);
trust = fullAccessCommonNames.contains(commonName);
if (trust) {
log.log(Level.FINE, "Client is trusted in secure mode: {0}",
commonName);
} else {
log.log(Level.FINE, "Client is not trusted in secure mode: {0}",
commonName);
}
} else {
InetAddress addr = ex.getRemoteAddress().getAddress();
trust = fullAccessAddresses.contains(addr);
if (trust) {
log.log(Level.FINE, "Client is trusted in non-secure mode: {0}", addr);
} else {
log.log(Level.FINE, "Client is not trusted in non-secure mode: {0}",
addr);
}
}
return trust;
}
@Override
public void handle(HttpExchange ex) throws IOException {
String requestMethod = ex.getRequestMethod();
if ("GET".equals(requestMethod) || "HEAD".equals(requestMethod)) {
/* Call into adaptor developer code to get document bytes. */
DocId docId = docIdDecoder.decodeDocId(HttpExchanges.getRequestUri(ex));
log.log(Level.FINE, "DocId: {0}", docId.getUniqueId());
if (!authzed(ex, docId)) {
return;
}
DocumentRequest request = new DocumentRequest(ex, docId);
DocumentResponse response
= new DocumentResponse(ex, docId, Thread.currentThread());
journal.recordRequestProcessingStart();
watchdog.processingStarting(headerTimeoutMillis);
try {
adaptor.getDocContent(request, response);
} catch (InterruptedException e) {
journal.recordRequestProcessingFailure();
throw new RuntimeException("Retriever interrupted: " + docId, e);
} catch (RuntimeException e) {
journal.recordRequestProcessingFailure();
throw new RuntimeException("Exception in retriever: " + docId, e);
} catch (IOException e) {
journal.recordRequestProcessingFailure();
throw new IOException("Exception in retriever: " + docId, e);
} finally {
watchdog.processingCompleted();
}
journal.recordRequestProcessingEnd(response.getWrittenContentSize());
response.complete();
} else {
HttpExchanges.cannedRespond(ex, HttpURLConnection.HTTP_BAD_METHOD,
Translation.HTTP_BAD_METHOD);
}
}
/**
* Check authz of user to access document. If the user is not authzed, the
* method handles responding to the HttpExchange.
*
* @return {@code true} if user authzed
*/
private boolean authzed(HttpExchange ex, DocId docId) throws IOException {
if ("SecMgr".equals(ex.getRequestHeaders().getFirst("User-Agent"))) {
// Assume that the SecMgr is performing a "HEAD" request to check authz.
// We don't support this, so we always issue deny.
HttpExchanges.cannedRespond(ex, HttpURLConnection.HTTP_FORBIDDEN,
Translation.HTTP_FORBIDDEN_SECMGR);
return false;
}
if (requestIsFromFullyTrustedClient(ex)) {
journal.recordGsaContentRequest(docId);
} else if (authzAuthority == null) {
HttpExchanges.cannedRespond(ex, HttpURLConnection.HTTP_FORBIDDEN,
Translation.HTTP_FORBIDDEN);
return false;
} else {
journal.recordNonGsaContentRequest(docId);
// Default to anonymous.
AuthnIdentity identity = null;
if (samlServiceProvider != null) {
identity = samlServiceProvider.getUserIdentity(ex);
}
Map<DocId, AuthzStatus> authzMap = authzAuthority.isUserAuthorized(
identity, Collections.singletonList(docId));
AuthzStatus status = authzMap != null ? authzMap.get(docId) : null;
if (status == null) {
status = AuthzStatus.DENY;
log.log(Level.WARNING, "Adaptor did not provide an authorization "
+ "result for the requested DocId ''{0}''. Instead provided: "
+ "{1}", new Object[] {docId, authzMap});
}
if (status == AuthzStatus.INDETERMINATE) {
HttpExchanges.cannedRespond(ex, HttpURLConnection.HTTP_NOT_FOUND,
Translation.HTTP_NOT_FOUND);
return false;
} else if (status == AuthzStatus.DENY) {
if (identity == null && samlServiceProvider != null) {
// User was anonymous and document is not public, so try to authn
// user.
samlServiceProvider.handleAuthentication(ex);
return false;
} else {
HttpExchanges.cannedRespond(ex, HttpURLConnection.HTTP_FORBIDDEN,
Translation.HTTP_FORBIDDEN);
return false;
}
}
}
return true;
}
/**
* Format the GSA-specific metadata header value for crawl-time metadata.
*/
static String formMetadataHeader(Metadata metadata) {
StringBuilder sb = new StringBuilder();
for (Entry<String, String> item : metadata) {
percentEncodeMapEntryPair(sb, item.getKey(), item.getValue());
}
return (sb.length() == 0) ? "" : sb.substring(0, sb.length() - 1);
}
@VisibleForTesting
static String formUnqualifiedAclHeader(Acl acl, DocIdEncoder docIdEncoder) {
if (acl == null) {
return "";
}
if (Acl.EMPTY.equals(acl)) {
acl = Acl.FAKE_EMPTY;
}
StringBuilder sb = new StringBuilder();
for (UserPrincipal permitUser : acl.getPermitUsers()) {
String name = permitUser.getName();
percentEncodeMapEntryPair(sb, "google:aclusers", name);
}
for (GroupPrincipal permitGroup : acl.getPermitGroups()) {
String name = permitGroup.getName();
percentEncodeMapEntryPair(sb, "google:aclgroups", name);
}
for (UserPrincipal denyUser : acl.getDenyUsers()) {
String name = denyUser.getName();
percentEncodeMapEntryPair(sb, "google:acldenyusers", name);
}
for (GroupPrincipal denyGroup : acl.getDenyGroups()) {
String name = denyGroup.getName();
percentEncodeMapEntryPair(sb, "google:acldenygroups", name);
}
if (acl.getInheritFrom() != null) {
URI uri = docIdEncoder.encodeDocId(acl.getInheritFrom());
try {
// Although it is named "fragment", we use a query parameter because the
// GSA "normalizes" away fragments.
uri = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(),
acl.getInheritFromFragment(), null);
} catch (URISyntaxException ex) {
throw new AssertionError(ex);
}
percentEncodeMapEntryPair(sb, "google:aclinheritfrom", uri.toString());
}
if (acl.getInheritanceType() != Acl.InheritanceType.LEAF_NODE) {
percentEncodeMapEntryPair(sb, "google:aclinheritancetype",
acl.getInheritanceType().getCommonForm());
}
return sb.substring(0, sb.length() - 1);
}
@VisibleForTesting
static String formNamespacedAclHeader(Acl acl, DocIdEncoder enc) {
if (null == acl) {
return "";
}
if (Acl.EMPTY.equals(acl)) {
acl = Acl.FAKE_EMPTY;
}
Map<String, Object> gsaAcl = new TreeMap<String, Object>();
List<Map<String, String>> gsaAclEntries = makeGsaAclEntries(acl);
if (!gsaAclEntries.isEmpty()) {
gsaAcl.put("entries", gsaAclEntries);
}
if (null != acl.getInheritFrom()) {
URI from = enc.encodeDocId(acl.getInheritFrom());
try {
// Although it is named "fragment", we use a query parameter because the
// GSA "normalizes" away fragments.
from = new URI(from.getScheme(), from.getAuthority(), from.getPath(),
acl.getInheritFromFragment(), null);
} catch (URISyntaxException ex) {
throw new AssertionError(ex);
}
gsaAcl.put("inherit_from", "" + from);
}
if (acl.getInheritanceType() != Acl.InheritanceType.LEAF_NODE) {
String type = "" + acl.getInheritanceType();
gsaAcl.put("inheritance_type", "" + type);
}
return JSONObject.toJSONString(gsaAcl);
}
private static List<Map<String, String>> makeGsaAclEntries(Acl acl) {
List<Map<String, String>> princ = new ArrayList<Map<String, String>>();
for (Principal p : acl.getPermitGroups()) {
princ.add(makeGsaAclEntry("permit", acl, p));
}
for (Principal p : acl.getDenyGroups()) {
princ.add(makeGsaAclEntry("deny", acl, p));
}
for (Principal p : acl.getPermitUsers()) {
princ.add(makeGsaAclEntry("permit", acl, p));
}
for (Principal p : acl.getDenyUsers()) {
princ.add(makeGsaAclEntry("deny", acl, p));
}
return princ;
}
private static Map<String, String> makeGsaAclEntry(String access,
Acl acl, Principal p) {
Map<String, String> gsaEntry = new TreeMap<String, String>();
gsaEntry.put("access", access);
gsaEntry.put("scope", p.isUser() ? "user" : "group");
gsaEntry.put("name", p.getName());
if (!Principal.DEFAULT_NAMESPACE.equals(p.getNamespace())) {
gsaEntry.put("namespace", p.getNamespace());
}
if (!acl.isEverythingCaseSensitive()) {
gsaEntry.put("case_sensitivity_type", "everything_case_insensitive");
}
return gsaEntry;
}
/**
* Format the GSA-specific anchor header value for extra crawl-time anchors.
*/
static String formAnchorHeader(List<URI> uris, List<String> texts) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < uris.size(); i++) {
URI uri = uris.get(i);
String text = texts.get(i);
if (text == null) {
sb.append(percentEncode(uri.toString()));
sb.append(",");
} else {
percentEncodeMapEntryPair(sb, text, uri.toString());
}
}
return (sb.length() == 0) ? "" : sb.substring(0, sb.length() - 1);
}
private static void percentEncodeMapEntryPair(StringBuilder sb, String key,
String value) {
sb.append(percentEncode(key));
sb.append("=");
sb.append(percentEncode(value));
sb.append(",");
}
/**
* Percent-encode {@code value} as described in
* <a href="http://tools.ietf.org/html/rfc3986#section-2">RFC 3986</a> and
* using UTF-8. This is the most common form of percent encoding. The
* characters A-Z, a-z, 0-9, '-', '_', '.', and '~' are left as-is; the rest
* are percent encoded.
*/
static String percentEncode(String value) {
final Charset encoding = Charset.forName("UTF-8");
StringBuilder sb = new StringBuilder();
byte[] bytes = value.getBytes(encoding);
for (byte b : bytes) {
if ((b >= 'a' && b <= 'z')
|| (b >= 'A' && b <= 'Z')
|| (b >= '0' && b <= '9')
|| b == '-' || b == '_' || b == '.' || b == '~') {
sb.append((char) b);
} else {
// Make sure it is positive
int i = b & 0xff;
String hex = Integer.toHexString(i).toUpperCase();
if (hex.length() > 2) {
throw new AssertionError();
}
while (hex.length() != 2) {
hex = "0" + hex;
}
sb.append('%').append(hex);
}
}
return sb.toString();
}
private static class DocumentRequest implements Request {
private final HttpExchange ex;
private final DocId docId;
private DocumentRequest(HttpExchange ex, DocId docId) {
this.ex = ex;
this.docId = docId;
}
@Override
public boolean hasChangedSinceLastAccess(Date lastModified) {
Date date = getLastAccessTime();
if (date == null) {
return true;
}
return date.before(lastModified);
}
@Override
public Date getLastAccessTime() {
return HttpExchanges.getIfModifiedSince(ex);
}
@Override
public DocId getDocId() {
return docId;
}
@Override
public String toString() {
return "Request(docId=" + docId
+ ",lastAccessTime=" + getLastAccessTime() + ")";
}
}
/**
* The state of the response. The state begins in SETUP mode, after which it
* should transition to another state and become fixed at that state.
*/
private enum State {
/**
* The class has not been informed how to respond, so we can still make
* changes to what will be provided in headers.
*/
SETUP,
/** No content to send, but we do need a different response code. */
NOT_MODIFIED,
/** No content to send, but we do need a different response code. */
NOT_FOUND,
/** Must not respond with content, but otherwise act like normal. */
HEAD,
/** No need to buffer contents before sending. */
SEND_BODY,
}
/**
* Handles incoming data from adaptor and sending it to the client. There are
* unfortunately many possible response cases. In short they are: document is
* Not Modified, document contents are ignored because we are responding to a
* HEAD request, transform pipeline is in use and document is small, transform
* pipeline is in use and document is large, and transform pipeline is not in
* use.
*
* <p>{@link #getOutputStream} and {@link #complete} are the main methods that
* need to be very aware of all the different possibilities.
*/
private class DocumentResponse implements Response {
private Thread workingThread;
private State state = State.SETUP;
private HttpExchange ex;
// Whether ex.getResponseBody().close() has been called while we are in the
// SEND_BODY state. This isn't used for much internal code that calls
// close on the stream since it is obvious in those states that we won't
// ever attempt to flush or close the stream a second time.
private boolean responseBodyClosed;
private OutputStream os;
private CountingOutputStream countingOs;
private String contentType;
private Date lastModified;
private Metadata metadata = new Metadata();
private Acl acl;
private boolean secure;
private List<URI> anchorUris = new ArrayList<URI>();
private List<String> anchorTexts = new ArrayList<String>();
private final DocId docId;
private boolean noIndex;
private boolean noFollow;
private boolean noArchive;
private URI displayUrl;
private boolean crawlOnce;
private boolean lock;
private Map<String, Acl> fragments = new TreeMap<String, Acl>();
public DocumentResponse(HttpExchange ex, DocId docId, Thread thread) {
this.ex = ex;
this.docId = docId;
this.workingThread = thread;
}
@Override
public void respondNotModified() throws IOException {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
state = State.NOT_MODIFIED;
}
@Override
public void respondNotFound() throws IOException {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
state = State.NOT_FOUND;
}
@Override
public OutputStream getOutputStream() throws IOException {
switch (state) {
case SETUP:
// We will need to make an OutputStream.
break;
case HEAD:
case SEND_BODY:
// Already called before. Provide saved OutputStream.
return os;
case NOT_MODIFIED:
throw new IllegalStateException("respondNotModified already called");
case NOT_FOUND:
throw new IllegalStateException("respondNotFound already called");
default:
throw new IllegalStateException("Already responded");
}
if ("HEAD".equals(ex.getRequestMethod())) {
// Unfortunately, we won't be able to report any errors after this
// point. We don't delay sending the headers, however, because of the
// watchdog.
state = State.HEAD;
startSending(false);
os = new SinkOutputStream();
} else {
state = State.SEND_BODY;
startSending(true);
countingOs = new CountingOutputStream(new CloseNotifyOutputStream(
ex.getResponseBody()));
os = countingOs;
}
return os;
}
@Override
public void setContentType(String contentType) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.contentType = contentType;
}
@Override
public void setLastModified(Date lastModified) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.lastModified = lastModified;
}
@Override
public void addMetadata(String key, String value) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
metadata.add(key, value);
}
@Override
public void setAcl(Acl acl) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.acl = acl;
}
@Override
public void putNamedResource(String fragment, Acl acl) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
// TODO(pjo): verify fragment string is valid
this.fragments.put(fragment, acl);
}
@Override
public void setSecure(boolean secure) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.secure = secure;
}
@Override
public void addAnchor(URI uri, String text) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
if (uri == null) {
throw new NullPointerException();
}
anchorUris.add(uri);
anchorTexts.add(text);
}
@Override
public void setNoIndex(boolean noIndex) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.noIndex = noIndex;
}
@Override
public void setNoFollow(boolean noFollow) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.noFollow = noFollow;
}
@Override
public void setNoArchive(boolean noArchive) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.noArchive = noArchive;
}
@Override
public void setDisplayUrl(URI displayUrl) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.displayUrl = displayUrl;
}
@Override
public void setCrawlOnce(boolean crawlOnce) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.crawlOnce = crawlOnce;
}
@Override
public void setLock(boolean lock) {
if (state != State.SETUP) {
throw new IllegalStateException("Already responded");
}
this.lock = lock;
}
private long getWrittenContentSize() {
return countingOs == null ? 0 : countingOs.getBytesWritten();
}
private void complete() throws IOException {
switch (state) {
case SETUP:
throw new IOException("No response sent from adaptor");
case NOT_MODIFIED:
HttpExchanges.respond(
ex, HttpURLConnection.HTTP_NOT_MODIFIED, null, null);
break;
case NOT_FOUND:
HttpExchanges.cannedRespond(ex, HttpURLConnection.HTTP_NOT_FOUND,
Translation.HTTP_NOT_FOUND);
break;
case SEND_BODY:
if (!responseBodyClosed) {
// The Adaptor didn't close the stream, so close it for them, making
// sure to flush any existing contents. We choose to use the same
// OutputStream as the Adaptor in order to prevent bugs due to
// different codepaths.
//
// In particular, it is possible the adaptor called getOutputStream,
// but didn't write out to the stream (consider an empty document
// and some code choosing to never call write because all the bytes
// were written).
os.flush();
os.close();
}
if (!responseBodyClosed) {
throw new AssertionError();
}
// At this point we are guaranteed that ex.getResponseBody().close()
// has been called.
break;
case HEAD:
break;
default:
throw new IllegalStateException();
}
ex.close();
}
private void startSending(boolean hasContent) throws IOException {
if (transform != null) {
transform();
}
acl = aclTransform.transform(acl);
if (requestIsFromFullyTrustedClient(ex)) {
// Always specify metadata and ACLs, even when empty, to replace
// previous values.
ex.getResponseHeaders().add("X-Gsa-External-Metadata",
formMetadataHeader(metadata));
if (sendDocControls) {
ex.getResponseHeaders().add("X-Gsa-Doc-Controls", "acl="
+ percentEncode(formNamespacedAclHeader(acl, docIdEncoder)));
if (null != displayUrl) {
String link = "display_url=" + percentEncode("" + displayUrl);
ex.getResponseHeaders().add("X-Gsa-Doc-Controls", link);
}
ex.getResponseHeaders().add("X-Gsa-Doc-Controls",
"crawl_once=" + crawlOnce);
ex.getResponseHeaders().add("X-Gsa-Doc-Controls", "lock=" + lock);
ex.getResponseHeaders().add("X-Gsa-Doc-Controls",
"scoring=" + scoring);
} else {
acl = checkAndWorkaroundGsa70Acl(acl);
ex.getResponseHeaders().add("X-Gsa-External-Metadata",
formUnqualifiedAclHeader(acl, docIdEncoder));
if (displayUrl != null || crawlOnce || lock) {
// Emulate these crawl-time values by sending them in feeds
// since they aren't supported at crawl-time on GSA 7.0.
pusher.asyncPushItem(new DocIdPusher.Record.Builder(docId)
.setResultLink(displayUrl).setCrawlOnce(crawlOnce).setLock(lock)
.build());
// TODO(ejona): figure out how to notice that a true went false
}
}
if (!anchorUris.isEmpty()) {
ex.getResponseHeaders().add("X-Gsa-External-Anchor",
formAnchorHeader(anchorUris, anchorTexts));
}
// (1) Always specify the security, either secure or public, because
// the default varies. For instance, requesting the client certificate
// of the GSA can mark documents secure, but it can also leave them as
// public, depending on a GSA configuration setting.
// (2) If document has ACL, then send secure. That helps the GSA
// and prevents confusion of having ACLs and public label juxtaposed.
ex.getResponseHeaders().add("X-Gsa-Serve-Security",
(secure || (null != acl)) ? "secure" : "public");
if (noIndex) {
ex.getResponseHeaders().add("X-Robots-Tag", "noindex");
}
if (noFollow) {
ex.getResponseHeaders().add("X-Robots-Tag", "nofollow");
}
if (noArchive) {
ex.getResponseHeaders().add("X-Robots-Tag", "noarchive");
}
}
if (useCompression) {
// TODO(ejona): decide when to use compression based on mime-type
HttpExchanges.enableCompressionIfSupported(ex);
}
if (lastModified != null) {
HttpExchanges.setLastModified(ex, lastModified);
}
// There are separate timeouts for sending headers and sending content.
// Here we stop the headers timer and start the content timer.
watchdog.processingCompleted(workingThread);
watchdog.processingStarting(workingThread, contentTimeoutMillis);
HttpExchanges.startResponse(
ex, HttpURLConnection.HTTP_OK, contentType, hasContent);
for (Map.Entry<String, Acl> fragment : fragments.entrySet()) {
pusher.asyncPushItem(new DocIdSender.AclItem(docId,
fragment.getKey(), fragment.getValue()));
}
}
private Acl checkAndWorkaroundGsa70Acl(Acl acl) {
if (acl == null) {
return acl;
}
// Check to see if the ACL can be used as-is with X-Gsa-External-Metadata
if (acl.isEverythingCaseSensitive()
&& allDefaultNamespace(acl.getPermitUsers())
&& allDefaultNamespace(acl.getPermitGroups())
&& allDefaultNamespace(acl.getDenyUsers())
&& allDefaultNamespace(acl.getDenyGroups())) {
return acl;
}
// Workaround for GSA 7.0 support. Since GSA 7.0 supports namespaces and
// case insensitivity in feeds, we create a named resource with all the
// "real" ACL data and put a noop ACL on the document itself.
// Unfortunately, to do this trick with AND_BOTH_PERMIT requires using the
// 'everyone' group, which would require namespace support on the
// document's ACLs.
Acl.Builder namedResourceAcl = new Acl.Builder(acl);
if (Acl.InheritanceType.LEAF_NODE.equals(acl.getInheritanceType())) {
namedResourceAcl.setInheritanceType(
Acl.InheritanceType.PARENT_OVERRIDES);
} else if (Acl.InheritanceType.AND_BOTH_PERMIT.equals(
acl.getInheritanceType())) {
throw new RuntimeException("Unable to use AND_BOTH_PERMIT with "
+ "advanced acls and GSA 7.0");
} else {
// CHILD_OVERRIDES and PARENT_OVERRIDES are fine as-is.
}
final String fragment = "generated";
pusher.asyncPushItem(
new DocIdSender.AclItem(docId, fragment, namedResourceAcl.build()));
return new Acl.Builder()
.setInheritanceType(acl.getInheritanceType())
.setInheritFrom(docId, fragment).build();
}
private boolean allDefaultNamespace(Iterable<? extends Principal> i) {
for (Principal p : i) {
if (!Principal.DEFAULT_NAMESPACE.equals(p.getNamespace())) {
return false;
}
}
return true;
}
private void transform() {
Map<String, String> params = new HashMap<String, String>();
params.put("DocId", docId.getUniqueId());
params.put("Content-Type", contentType);
transform.transform(metadata, params);
contentType = params.get("Content-Type");
}
private class CloseNotifyOutputStream extends FastFilterOutputStream {
public CloseNotifyOutputStream(OutputStream os) {
super(os);
}
@Override
public void close() throws IOException {
responseBodyClosed = true;
super.close();
}
}
}
/**
* OutputStream that forgets all input. It is equivalent to using /dev/null.
*/
private static class SinkOutputStream extends OutputStream {
@Override
public void write(byte[] b, int off, int len) throws IOException {}
@Override
public void write(int b) throws IOException {}
}
private static class CountingOutputStream extends FastFilterOutputStream {
private long count;
public CountingOutputStream(OutputStream out) {
super(out);
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
super.write(b, off, len);
// Increment after write so that 'len' is known valid. If an exception is
// thrown then this is likely the better behavior as well.
count += len;
}
public long getBytesWritten() {
return count;
}
}
/**
* {@link ByteArrayOutputStream} that allows inquiring the current number of
* bytes written.
*/
private static class CountByteArrayOutputStream
extends ByteArrayOutputStream {
public int getCount() {
return count;
}
}
/**
* Stream that buffers all content up to a maximum size, at which point it
* stops buffering altogether.
*/
private static class MaxBufferOutputStream extends FastFilterOutputStream {
private static final Logger log
= Logger.getLogger(MaxBufferOutputStream.class.getName());
private CountByteArrayOutputStream buffer
= new CountByteArrayOutputStream();
private final int maxBytes;
public MaxBufferOutputStream(OutputStream out, int maxBytes) {
super(out);
this.maxBytes = maxBytes;
}
@Override
public void close() throws IOException {
if (buffer == null) {
super.close();
}
}
@Override
public void flush() throws IOException {
if (buffer == null) {
super.flush();
}
}
/**
* Returns the buffered content, or {@code null} when too much content was
* written an the provided {@code OutputStream} was used.
*/
public byte[] getBufferedContent() {
if (buffer == null) {
return null;
}
return buffer.toByteArray();
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
if (buffer != null && buffer.getCount() + len > maxBytes) {
// Buffer begins overflowing. Flush buffer and stop using it.
log.fine("Buffer was exhausted. Stopping buffering.");
buffer.writeTo(out);
buffer = null;
}
if (buffer == null) {
// Buffer was exhausted. Write out directly.
super.write(b, off, len);
return;
}
// Write to buffer.
buffer.write(b, off, len);
}
}
interface AsyncPusher {
public void asyncPushItem(DocIdSender.Item item);
}
}