blob: 236b4777acb3070c6402454930447ee7caaef579 [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package adaptorlib;
import java.io.*;
import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
/**
* Configuration values for this program like the GSA's hostname. Also several
* knobs, or controls, for changing the behavior of the program.
*/
public class Config {
/** Configuration keys whose default value is {@code null}. */
protected final Set<String> noDefaultConfig = new HashSet<String>();
/** Default configuration values */
protected final Properties defaultConfig = new Properties();
/** Overriding configuration values loaded from file and command line */
protected Properties config = new Properties(defaultConfig);
protected static final String DEFAULT_CONFIG_FILE
= "adaptor-config.properties";
public Config() {
String hostname = null;
try {
hostname = InetAddress.getLocalHost().getCanonicalHostName();
} catch (UnknownHostException ex) {
// Ignore
}
addKey("server.hostname", hostname);
addKey("server.port", "5678");
addKey("server.docIdPath", "/doc/");
addKey("server.gsaIps", "");
addKey("server.addResolvedGsaHostnameToGsaIps", "true");
addKey("server.secure", "false");
addKey("server.keyAlias", "adaptor");
addKey("gsa.hostname", null);
addKey("gsa.characterEncoding", "UTF-8");
addKey("docId.isUrl", "false");
addKey("feed.name", "testfeed");
addKey("feed.noRecrawlBitEnabled", "false");
addKey("feed.crawlImmediatelyBitEnabled", "false");
//addKey("feed.noFollowBitEnabled", "false");
addKey("feed.maxUrls", "5000");
}
public Set<String> getAllKeys() {
return config.stringPropertyNames();
}
/* Preferences requiring you to set them: */
/**
* Required to be set: GSA machine to send document ids to. This is the
* hostname of your GSA on your network.
*/
public String getGsaHostname() {
return getValue("gsa.hostname");
}
/* Preferences suggested you set them: */
public String getFeedName() {
return getValue("feed.name");
}
/**
* Suggested to be set: Local port, on this computer, onto which requests from
* GSA come in on.
*/
public int getServerPort() {
return Integer.parseInt(getValue("server.port"));
}
/* More sophisticated preferences that can be left
unmodified for simple deployment and initial POC: */
/**
* Optional (default false): If your DocIds are already valid URLs you can
* have this method return true and they will be sent to GSA unmodified. If
* your DocId is like http://procurement.corp.company.com/internal/011212.html
* you can turn this true and that URL will be handed to the GSA.
*
* <p>By default DocIds are URL encoded and prefixed with http:// and this
* host's name and port.
*/
public boolean isDocIdUrl() {
return Boolean.parseBoolean(getValue("docId.isUrl"));
}
/** Without changes contains InetAddress.getLocalHost().getHostName(). */
public String getServerHostname() {
return getValue("server.hostname");
}
/**
* Whether to automatically consider "gsa.hostname" configuration value part
* of the "server.gsaIps" list. Defaults to {@code true"}.
*
* @see #getServerGsaIps
*/
public boolean getServerAddResolvedGsaHostnameToGsaIps() {
return Boolean.parseBoolean(getValue(
"server.addResolvedGsaHostnameToGsaIps"));
}
/**
* Comma-separated list of IPs or hostnames to consider the GSA and bypass
* authentication checks.
*/
public String[] getServerGsaIps() {
return getValue("server.gsaIps").split(",");
}
/**
* Optional: Returns this host's base URI which other paths will be resolved
* against. It is used to construct URIs to provide to the GSA for it to
* contact this server for various services. For documents (which is probably
* what you care about), the {@link #getServerBaseUri(DocId)} version is used
* instead.
*
* <p>It must contain the protocol, hostname, and port, but may optionally
* contain a path like {@code /yourfavoritepath}. By default, the protocol,
* hostname, and port are retrieved automatically and no path is set.
*/
public URI getServerBaseUri() {
String protocol = isServerSecure() ? "https" : "http";
return URI.create(protocol + "://" + getServerHostname() + ":"
+ getServerPort());
}
/**
* Optional: Path below {@link #getServerBaseUri(DocId)} where documents are
* namespaced. Generally, should be at least {@code "/"} and end with a slash.
*/
public String getServerDocIdPath() {
return getValue("server.docIdPath");
}
/**
* Optional: Returns the host's base URI which GSA will contact for document
* information, including document contents. By default it returns {@link
* #getServerBaseUri()}. However, if you would like to direct GSA's queries
* for contents to go to other computers/binaries then you can change this
* method.
*
* <p>For example, imagine that you want five binaries to serve the contents
* of files to the GSA. In this case you could split the document ids into
* five categories using something like:
*
* <pre>String urlBeginnings[] = new String[] {
* "http://content-server-A:5678",
* "http://content-server-B:5678",
* "http://backup-server-A:5678",
* "http://backup-server-B:5678",
* "http://new-server:7878"
* };
* int shard = docId.getUniqueId().hashCode() % 5;
* return URI.create(urlBeginnings[shard]);</pre>
*
* <p>Note that this URI is used in conjunction with {@link
* #getServerDocIdPath} and the document ID to form the full URL. In addition,
* by using {@link #getServerBaseUri()} and {@code getDocIdPath()}, we have to
* be able to parse back the original document ID when a request comes to this
* server.
*/
public URI getServerBaseUri(DocId docId) {
return getServerBaseUri();
}
/**
* Whether full security should be enabled. When {@code true}, the adaptor is
* locked down using HTTPS, checks certificates, and generally behaves in a
* fully-secure manner. When {@code false} (default), the adaptor serves
* content over HTTP and is unable to authenticate users (all users are
* treated as anonymous).
*
* <p>The need for this setting is because when enabled, security requires a
* reasonable amount of configuration and know-how. To provide easy
* out-of-the-box execution, this is disabled by default.
*/
public boolean isServerSecure() {
return Boolean.parseBoolean(getValue("server.secure"));
}
/**
* The alias in the keystore that has the key to use for encryption.
*/
public String getServerKeyAlias() {
return getValue("server.keyAlias");
}
/**
* Optional (default false): Adds no-recrawl bit with sent records in feed
* file. If connector handles updates and deletes then GSA does not have to
* recrawl periodically to notice that a document is changed or deleted.
*/
public boolean isFeedNoRecrawlBitEnabled() {
return Boolean.getBoolean(getValue("feed.noRecrawlBitEnabled"));
}
/**
* Optional (default false): Adds crawl-immediately bit with sent records in
* feed file. This bit makes the sent URL get crawl priority.
*/
public boolean isCrawlImmediatelyBitEnabled() {
return Boolean.parseBoolean(getValue("feed.crawlImmediatelyBitEnabled"));
}
// TODO(pjo): Implement on GSA
// /**
// * Optional (default false): Adds no-follow bit with sent records in feed
// * file. No-follow means that if document content has links they are not
// * followed.
// */
// public boolean isNoFollowBitEnabled() {
// return Boolean.parseBoolean(getValue("feed.noFollowBitEnabled"));
// }
/* Preferences expected to never change: */
/** Provides the character encoding the GSA prefers. */
public Charset getGsaCharacterEncoding() {
return Charset.forName(getValue("gsa.characterEncoding"));
}
/**
* Provides max number of URLs (equal to number of document ids) that are sent
* to the GSA per feed file.
*/
public int getFeedMaxUrls() {
return Integer.parseInt(getValue("feed.maxUrls"));
}
/**
* Load user-provided configuration file.
*/
public void load(String configFile) throws IOException {
load(new File(configFile));
}
/**
* Load user-provided configuration file.
*/
public void load(File configFile) throws IOException {
load(new InputStreamReader(new FileInputStream(configFile),
Charset.forName("UTF-8")));
}
/**
* Load user-provided configuration file.
*/
public void load(Reader configFile) throws IOException {
config.load(configFile);
}
/**
* Loads {@code adaptor-config.properties} in the current directory, if it
* exists. It squelches any errors so that you are free to call it without
* error handling, since this is typically non-fatal.
*/
public void loadDefaultConfigFile() {
File confFile = new File(DEFAULT_CONFIG_FILE);
if (confFile.exists() && confFile.isFile()) {
try {
load(confFile);
} catch (IOException ex) {
System.err.println("Exception when reading " + DEFAULT_CONFIG_FILE);
ex.printStackTrace(System.err);
}
}
}
/**
* Load default configuration file and parse command line options.
*
* @return unused command line arguments
* @throws IllegalStateException when not all configuration keys have values
*/
public String[] autoConfig(String[] args) {
loadDefaultConfigFile();
int i;
for (i = 0; i < args.length; i++) {
if (!args[i].startsWith("-D")) {
break;
}
String arg = args[i].substring(2);
String[] parts = arg.split("=", 2);
if (parts.length < 2) {
break;
}
setValue(parts[0], parts[1]);
}
Set<String> unset = new HashSet<String>();
for (String key : noDefaultConfig) {
if (config.getProperty(key) == null) {
unset.add(key);
}
}
if (unset.size() != 0) {
throw new IllegalStateException("Missing configuration values: " + unset);
}
if (i == 0) {
return args;
} else {
return Arrays.copyOfRange(args, i, args.length);
}
}
/**
* Get a configuration value. Never returns {@code null}.
*
* @throws IllegalStateException if {@code key} has no value
*/
public String getValue(String key) {
String value = config.getProperty(key);
if (value == null) {
throw new IllegalStateException(MessageFormat.format(
"You must set configuration key ''{0}''.", key));
}
return value;
}
/**
* Add configuration key. If defaultValue is {@code null}, then no default
* value is used.
*/
public void addKey(String key, String defaultValue) {
if (defaultConfig.contains(key) || noDefaultConfig.contains(key)) {
throw new IllegalStateException("Key already added: " + key);
}
if (defaultValue == null) {
noDefaultConfig.add(key);
} else {
defaultConfig.setProperty(key, defaultValue);
}
}
/**
* Manually set a configuration value. Depending on when called, it can
* override a user's configuration, which should be avoided.
*/
void setValue(String key, String value) {
config.setProperty(key, value);
}
}