blob: 22d5b0d433000ec349691b89a07e1883122e498d [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package adaptorlib;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpsExchange;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.text.DateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.net.ssl.SSLPeerUnverifiedException;
class DocumentHandler extends AbstractHandler {
private static final Logger log
= Logger.getLogger(AbstractHandler.class.getName());
private DocIdDecoder docIdDecoder;
private Journal journal;
private Adaptor adaptor;
private Set<InetAddress> gsaAddresses = new HashSet<InetAddress>();
private final HttpHandler authnHandler;
private final SessionManager<HttpExchange> sessionManager;
public DocumentHandler(String defaultHostname, Charset defaultCharset,
DocIdDecoder docIdDecoder, Journal journal,
Adaptor adaptor,
boolean addResolvedGsaHostnameToGsaIps,
String gsaHostname, String[] gsaIps,
HttpHandler authnHandler,
SessionManager<HttpExchange> sessionManager) {
super(defaultHostname, defaultCharset);
this.docIdDecoder = docIdDecoder;
this.journal = journal;
this.adaptor = adaptor;
this.authnHandler = authnHandler;
this.sessionManager = sessionManager;
if (addResolvedGsaHostnameToGsaIps) {
try {
gsaAddresses.add(InetAddress.getByName(gsaHostname));
} catch (UnknownHostException ex) {
throw new RuntimeException(ex);
}
}
for (String gsaIp : gsaIps) {
gsaIp = gsaIp.trim();
if ("".equals(gsaIp)) {
continue;
}
try {
gsaAddresses.add(InetAddress.getByName(gsaIp));
} catch (UnknownHostException ex) {
throw new RuntimeException(ex);
}
}
log.log(Level.INFO, "IPs to believe are the GSA: {0}",
new Object[] {gsaAddresses});
}
private boolean requestIsFromGsa(HttpExchange ex) {
boolean trust;
if (ex instanceof HttpsExchange) {
try {
((HttpsExchange) ex).getSSLSession().getPeerPrincipal();
trust = true;
} catch (SSLPeerUnverifiedException e) {
trust = false;
}
} else {
InetAddress addr = ex.getRemoteAddress().getAddress();
trust = gsaAddresses.contains(addr);
}
if (trust) {
log.fine("Client is trusted");
} else {
log.fine("Client is not trusted");
}
return trust;
}
@Override
public void meteredHandle(HttpExchange ex) throws IOException {
String requestMethod = ex.getRequestMethod();
if ("GET".equals(requestMethod) || "HEAD".equals(requestMethod)) {
/* Call into adaptor developer code to get document bytes. */
// TODO(ejona): Need to namespace all docids to allow random support URLs
DocId docId = docIdDecoder.decodeDocId(getRequestUri(ex));
log.fine("id: " + docId.getUniqueId());
boolean isAllowed;
if (requestIsFromGsa(ex)) {
journal.recordGsaContentRequest(docId);
isAllowed = true;
} else {
journal.recordNonGsaContentRequest(docId);
// Default to anonymous.
String principal = null;
Set<String> groups = Collections.emptySet();
Session session = sessionManager.getSession(ex, false);
if (session != null) {
AuthnState authnState
= (AuthnState) session.getAttribute(AuthnState.SESSION_ATTR_NAME);
if (authnState != null && authnState.isAuthenticated()) {
principal = authnState.getPrincipal();
groups = authnState.getGroups();
}
}
Map<DocId, AuthzStatus> authzMap = adaptor.isUserAuthorized(principal,
groups, Collections.singletonList(docId));
AuthzStatus status = authzMap != null ? authzMap.get(docId) : null;
if (status == null) {
status = AuthzStatus.INDETERMINATE;
log.log(Level.WARNING, "Adaptor did not provide an authorization "
+ "result for the requested DocId ''{0}''. Instead provided: "
+ "{1}", new Object[] {docId, authzMap});
}
isAllowed = (status == AuthzStatus.PERMIT);
if (!isAllowed && principal == null && authnHandler != null) {
// User was anonymous and document is not public, so try to authn
// user.
authnHandler.handle(ex);
return;
}
}
if (!isAllowed) {
cannedRespond(ex, HttpURLConnection.HTTP_FORBIDDEN, "text/plain",
"403: Forbidden");
return;
}
DocumentRequest request = new DocumentRequest(ex, docId,
dateFormat.get());
DocumentResponse response = new DocumentResponse(ex);
// TODO(ejona): if text, support providing encoding
journal.recordRequestProcessingStart();
byte[] content;
String contentType;
int httpResponseCode;
Metadata metadata;
try {
try {
adaptor.getDocContent(request, response);
} finally {
// We want this to be recorded immediately, not after sending error
// codes
journal.recordRequestProcessingEnd(response.getWrittenContentSize());
}
content = response.getWrittenContent();
contentType = response.contentType;
httpResponseCode = response.httpResponseCode;
metadata = response.metadata;
} catch (FileNotFoundException e) {
cannedRespond(ex, HttpURLConnection.HTTP_NOT_FOUND, "text/plain",
"Unknown document: " + e.getMessage());
return;
} catch (IOException e) {
cannedRespond(ex, HttpURLConnection.HTTP_INTERNAL_ERROR, "text/plain",
"IO Exception: " + e.getMessage());
return;
} catch (Exception e) {
log.log(Level.WARNING, "Unexpected exception from getDocContent", e);
cannedRespond(ex, HttpURLConnection.HTTP_INTERNAL_ERROR, "text/plain",
"Exception (" + e.getClass().getName() + "): "
+ e.getMessage());
return;
}
if (httpResponseCode != HttpURLConnection.HTTP_OK
&& httpResponseCode != HttpURLConnection.HTTP_NOT_MODIFIED) {
log.log(Level.WARNING, "Unexpected response code (was any response "
+ "sent from the adaptor?): {0}", httpResponseCode);
cannedRespond(ex, HttpURLConnection.HTTP_INTERNAL_ERROR, "text/plain",
"Tried to return unexpected response code");
return;
}
if (content == null) {
log.finer("processed request; response is null. This is normal for HEAD"
+ " requests.");
} else {
log.finer("processed request; response is size=" + content.length);
}
// TODO(ejona): decide when to use compression based on mime-type
enableCompressionIfSupported(ex);
if (metadata != null && requestIsFromGsa(ex)) {
ex.getResponseHeaders().set("X-Gsa-External-Metadata",
formMetadataHeader(metadata));
}
if ("GET".equals(requestMethod)) {
respond(ex, httpResponseCode, contentType, content);
} else {
respondToHead(ex, httpResponseCode, contentType);
}
} else {
cannedRespond(ex, HttpURLConnection.HTTP_BAD_METHOD, "text/plain",
"Unsupported request method");
}
}
@Override
protected void respond(HttpExchange ex, int code, String contentType,
byte[] response) throws IOException {
journal.recordRequestResponseStart();
try {
super.respond(ex, code, contentType, response);
} finally {
journal.recordRequestResponseEnd(response == null ? 0 : response.length);
}
}
/**
* Format the GSA-specific metadata header value for crawl-time metadata.
*/
static String formMetadataHeader(Metadata metadata) {
StringBuilder sb = new StringBuilder();
for (MetaItem item : metadata) {
sb.append(percentEncode(item.getName()));
sb.append("=");
sb.append(percentEncode(item.getValue()));
sb.append(",");
}
return (sb.length() == 0) ? "" : sb.substring(0, sb.length() - 1);
}
/**
* Percent-encode {@code value} as described in
* <a href="http://tools.ietf.org/html/rfc3986#section-2">RFC 3986</a> and
* using UTF-8. This is the most common form of percent encoding. The
* characters A-Z, a-z, '-', '_', '.', and '~' are left as-is; the rest are
* percent encoded.
*/
static String percentEncode(String value) {
final Charset encoding = Charset.forName("UTF-8");
StringBuilder sb = new StringBuilder();
byte[] bytes = value.getBytes(encoding);
for (byte b : bytes) {
if ((b >= 'a' && b <= 'z')
|| (b >= 'A' && b <= 'Z')
|| b == '-' || b == '_' || b == '.' || b == '~') {
sb.append((char) b);
} else {
// Make sure it is positive
int i = b & 0xff;
String hex = Integer.toHexString(i).toUpperCase();
if (hex.length() > 2) {
throw new IllegalStateException();
}
while (hex.length() != 2) {
hex = "0" + hex;
}
sb.append('%').append(hex);
}
}
return sb.toString();
}
private static class DocumentRequest implements Adaptor.Request {
// DateFormats are relatively expensive to create, and cannot be used from
// multiple threads
private final DateFormat dateFormat;
private final HttpExchange ex;
private final DocId docId;
private DocumentRequest(HttpExchange ex, DocId docId,
DateFormat dateFormat) {
this.ex = ex;
this.docId = docId;
this.dateFormat = dateFormat;
}
@Override
public boolean hasChangedSinceLastAccess(Date lastModified) {
Date date = getLastAccessTime();
if (date == null) {
return true;
}
return date.before(lastModified);
}
@Override
public Date getLastAccessTime() {
return getIfModifiedSince(ex);
}
@Override
public DocId getDocId() {
return docId;
}
}
private static class DocumentResponse implements Adaptor.Response {
/** Special instance of stream that denotes that not modified was sent */
private static final OutputStream notModifiedOs = new SinkOutputStream();
private HttpExchange ex;
private OutputStream os;
private String contentType;
private int httpResponseCode;
private Metadata metadata;
public DocumentResponse(HttpExchange ex) {
this.ex = ex;
if ("HEAD".equals(ex.getRequestMethod())) {
// There is no need for them to call getOutputStream
httpResponseCode = HttpURLConnection.HTTP_OK;
}
}
@Override
public void respondNotModified() {
if (os != null) {
throw new IllegalStateException("getOutputStream already called");
}
httpResponseCode = HttpURLConnection.HTTP_NOT_MODIFIED;
os = notModifiedOs;
}
@Override
public OutputStream getOutputStream() {
if (os == notModifiedOs) {
throw new IllegalStateException("respondNotModified already called");
}
if (os != null) {
return os;
}
httpResponseCode = HttpURLConnection.HTTP_OK;
if ("HEAD".equals(ex.getRequestMethod())) {
os = new SinkOutputStream();
} else {
os = new ByteArrayOutputStream();
}
return os;
}
@Override
public void setContentType(String contentType) {
if (os != null) {
throw new IllegalStateException();
}
this.contentType = contentType;
}
@Override
public void setMetadata(Metadata metadata) {
if (os != null) {
throw new IllegalStateException();
}
this.metadata = metadata;
}
private long getWrittenContentSize() {
if (os instanceof ByteArrayOutputStream) {
return ((ByteArrayOutputStream) os).size();
} else {
return 0;
}
}
private byte[] getWrittenContent() {
if (os instanceof ByteArrayOutputStream) {
return ((ByteArrayOutputStream) os).toByteArray();
} else {
return null;
}
}
}
/**
* OutputStream that forgets all input. It is equivalent to using /dev/null.
*/
private static class SinkOutputStream extends OutputStream {
@Override
public void write(byte[] b, int off, int len) throws IOException {}
@Override
public void write(int b) throws IOException {}
}
}