blob: 8f02f7a9d285b4a2ec654569b95aee87fa48298c [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.enterprise.adaptor;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Tests for {@link CommandStreamParser}.
*/
public class CommandStreamParserTest {
@Rule
public ExpectedException thrown = ExpectedException.none();
@Test
public void testReadDocIds() throws Exception {
String source = "GSA Adaptor Data Version 1 [\n]\nid=123\nid=456\n" +
"id-list\n10\n20\n30\n\nid=789\n";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
AccumulatingDocIdPusher pusher = new AccumulatingDocIdPusher();
parser.readFromLister(pusher, null);
assertEquals("123", pusher.getDocIds().get(0).getUniqueId());
assertEquals("456", pusher.getDocIds().get(1).getUniqueId());
assertEquals("10", pusher.getDocIds().get(2).getUniqueId());
assertEquals("20", pusher.getDocIds().get(3).getUniqueId());
assertEquals("30", pusher.getDocIds().get(4).getUniqueId());
assertEquals("789", pusher.getDocIds().get(5).getUniqueId());
}
@Test
public void testInvalidHeaderString() throws Exception {
String source = "GSA Adaptor Data Ver 1 [\n]\nid=123\nid=456\nid-list\n10\n20\n30\n\nid=789" +
"\nend-message";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
AccumulatingDocIdPusher pusher = new AccumulatingDocIdPusher();
thrown.expect(IOException.class);
parser.readFromLister(pusher, null);
}
@Test
public void testInvalidVersion() throws Exception {
String source = "GSA Adaptor Data Version 1a [\n]\nid=123\nid=456\nid-list\n10\n20\n30\n\n" +
"id=789\n";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
AccumulatingDocIdPusher pusher = new AccumulatingDocIdPusher();
thrown.expect(IOException.class);
parser.readFromLister(pusher, null);
}
@Test
public void testEmptyDelimiter() throws Exception {
String source = "GSA Adaptor Data Version 1 []\nid=123\nid=456\nid-list\n10\n20\n30\n\nid=789" +
"\n";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
AccumulatingDocIdPusher pusher = new AccumulatingDocIdPusher();
thrown.expect(IOException.class);
parser.readFromLister(pusher, null);
}
void checkDelimiter(String delimiter, boolean isValid) throws Exception {
String source = "GSA Adaptor Data Version 1 [" + delimiter + "]" + delimiter + "id=123";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
if (!isValid) {
thrown.expect(IOException.class);
}
AccumulatingDocIdPusher pusher = new AccumulatingDocIdPusher();
parser.readFromLister(pusher, null);
if (isValid) {
assertEquals(new DocId("123"), pusher.getDocIds().get(0)); //.getUniqueId());
}
}
@Test
public void testUnsupportedDelimiterCharacters() throws Exception {
checkDelimiter("A", false);
checkDelimiter("K", false);
checkDelimiter("Z", false);
checkDelimiter("a", false);
checkDelimiter("k", false);
checkDelimiter("z", false);
checkDelimiter("0", false);
checkDelimiter("5", false);
checkDelimiter("9", false);
checkDelimiter(":", false);
checkDelimiter("/", false);
checkDelimiter("-", false);
checkDelimiter("_", false);
checkDelimiter(" ", false);
checkDelimiter("=", false);
checkDelimiter("+", false);
checkDelimiter("[", false);
checkDelimiter("]", false);
checkDelimiter("<+>", false);
checkDelimiter("/n /n", false);
}
@Test
public void testSupportedDelimiterCharacters() throws Exception {
checkDelimiter("\0", true);
checkDelimiter("~!#$%^&*(){}", true);
checkDelimiter("ĀÁÂḀⒶ", true);
checkDelimiter("ტყაოსანი", true);
}
@Test
public void testRetriever() throws IOException {
String source = "GSA Adaptor Data Version 1 [\n]\n" +
"id=123\n" +
"up-to-date\n" +
"UNKNOWN_COMMAND=abcdefghi\n" +
"meta-name=project\nmeta-value=plexi\n" +
"last-modified=15\n" +
"secure=true\n" +
"anchor-uri=http://example.com/doc\nanchor-text=It is an example\n" +
"no-index=true\n" +
"no-follow=true\n" +
"no-archive=true\n" +
"display-url=http://example.com/thisDoc\n" +
"crawl-once=true\n" +
"lock=true\n" +
"content\n2468";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
WrapperAdaptor.GetContentsResponse response
= new WrapperAdaptor.GetContentsResponse(outputStream);
CommandStreamParser parser = new CommandStreamParser(inputStream);
int version = parser.getVersionNumber();
assertEquals(1, version);
parser.readFromRetriever(new DocId("123"), response);
assertTrue(response.isNotModified());
assertArrayEquals("2468".getBytes(), outputStream.toByteArray());
Metadata metadata = response.getMetadata();
assertEquals(1, metadata.getKeys().size());
assertEquals(new Date(15 * 1000), response.getLastModified());
assertEquals(true, response.isSecure());
assertEquals(Arrays.asList(URI.create("http://example.com/doc")), response.getAnchorUris());
assertEquals(Arrays.asList("It is an example"), response.getAnchorTexts());
assertEquals(true, response.isNoIndex());
assertEquals(true, response.isNoFollow());
assertEquals(true, response.isNoArchive());
assertEquals(URI.create("http://example.com/thisDoc"), response.getDisplayUrl());
assertEquals(true, response.isCrawlOnce());
assertEquals(true, response.isLock());
assertEquals("plexi", metadata.getOneValue("project"));
}
@Test
public void testRetrieverMultipleMetadataValuesSameKey() throws IOException {
String source = "GSA Adaptor Data Version 1 [\n]\n" +
"id=123\n" +
"up-to-date\n" +
"UNKNOWN_COMMAND=abcdefghi\n" +
"meta-name=project\nmeta-value=plexi\n" +
"meta-name=project\nmeta-value=klexa\ncontent\n2468";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
WrapperAdaptor.GetContentsResponse response
= new WrapperAdaptor.GetContentsResponse(outputStream);
CommandStreamParser parser = new CommandStreamParser(inputStream);
int version = parser.getVersionNumber();
assertEquals(1, version);
parser.readFromRetriever(new DocId("123"), response);
assertTrue(response.isNotModified());
assertArrayEquals("2468".getBytes(), outputStream.toByteArray());
Metadata metadata = response.getMetadata();
assertEquals(1, metadata.getKeys().size());
Set<String> projectNames = new HashSet<String>();
projectNames.add("plexi");
projectNames.add("klexa");
assertEquals(projectNames, metadata.getAllValues("project"));
}
@Test
public void testReadContentAllBytes() throws IOException {
String commandSource = "GSA Adaptor Data Version 1 [\n]\nid=5\ncontent\n";
byte[] byteSource = new byte[256];
byte value = -128;
for (int i = 0; i <= 255; i++) {
byteSource[i] = value++;
}
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
byteArrayOutputStream.write(commandSource.getBytes("UTF-8"));
byteArrayOutputStream.write(byteSource);
byte[] source = byteArrayOutputStream.toByteArray();
InputStream inputStream = new ByteArrayInputStream(source);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
WrapperAdaptor.GetContentsResponse response
= new WrapperAdaptor.GetContentsResponse(outputStream);
CommandStreamParser parser = new CommandStreamParser(inputStream);
parser.readFromRetriever(new DocId("5"), response);
assertArrayEquals(byteSource, outputStream.toByteArray());
}
@Test
public void testReadBytes() throws IOException {
String commandSource = "GSA Adaptor Data Version 1 [\n]\nid=5\ncontent\n";
byte[] byteSource = new byte[256];
byte value = -128;
for (int i = 0; i <= 255; i++) {
byteSource[i] = value++;
}
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
byteArrayOutputStream.write(commandSource.getBytes("UTF-8"));
byteArrayOutputStream.write(byteSource);
byte[] source = byteArrayOutputStream.toByteArray();
InputStream inputStream = new ByteArrayInputStream(source);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
WrapperAdaptor.GetContentsResponse response
= new WrapperAdaptor.GetContentsResponse(outputStream);
CommandStreamParser parser = new CommandStreamParser(inputStream);
parser.readFromRetriever(new DocId("5"), response);
assertArrayEquals(byteSource, outputStream.toByteArray());
}
@Test
public void testRepositoryUnavailable() throws IOException {
String source = "GSA Adaptor Data Version 1 [\n]\nrepository-unavailable";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
thrown.expect(IOException.class);
Map<DocId, AuthzStatus> result = parser.readFromAuthorizer();
}
@Test
public void testAuthorizorNoData() throws IOException {
String source = "GSA Adaptor Data Version 1 [\n]\n";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
Map<DocId, AuthzStatus> result = parser.readFromAuthorizer();
assertEquals(0, result.size());
}
@Test
public void testAuthorizorDataStartsWithDocId() throws IOException {
String source = "GSA Adaptor Data Version 1 [\n]\nauthz-status=PERMIT\n" +
"id=001\nauthz-status=DENY\n";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
thrown.expect(IOException.class);
Map<DocId, AuthzStatus> result = parser.readFromAuthorizer();
}
@Test
public void testAuthorizor() throws IOException {
String source = "GSA Adaptor Data Version 1 [\n]\nid=001\nauthz-status=PERMIT\n" +
"id=002\nauthz-status=DENY\nid=003\nauthz-status=INDETERMINATE";
Map<DocId, AuthzStatus> expected = new HashMap<DocId, AuthzStatus>();
expected.put(new DocId("001"), AuthzStatus.PERMIT);
expected.put(new DocId("002"), AuthzStatus.DENY);
expected.put(new DocId("003"), AuthzStatus.INDETERMINATE);
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
Map<DocId, AuthzStatus> result = parser.readFromAuthorizer();
assertEquals(expected, result);
}
@Test
public void testListerNoData() throws Exception {
String source = "GSA Adaptor Data Version 1 [\n]\n";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
AccumulatingDocIdPusher pusher = new AccumulatingDocIdPusher();
parser.readFromLister(pusher, null);
assertEquals(0, pusher.getDocIds().size());
}
@Test
public void testListerDataStartsWithDocId() throws Exception {
String source = "GSA Adaptor Data Version 1 [\n]\nlock\n" +
"id=001\n";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
AccumulatingDocIdPusher pusher = new AccumulatingDocIdPusher();
thrown.expect(IOException.class);
parser.readFromLister(pusher, null);
}
@Test
public void testLister() throws Exception {
String source = "GSA Adaptor Data Version 1 [\n]\n" +
"id=001\nlock\ncrawl-once\ndelete\ncrawl-immediately\n" +
"last-modified=1292805597\n" +
"result-link=http://docs.google.com/myfolder/mydoc.pdf\n" +
"lock\ndelete\n";
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
AccumulatingDocIdPusher pusher = new AccumulatingDocIdPusher();
DocIdPusher.Record expected = new DocIdPusher.Record.Builder(new DocId("001"))
.setCrawlImmediately(true)
.setCrawlOnce(true)
.setDeleteFromIndex(true)
.setLastModified(new java.util.Date(1292805597000L))
.setResultLink(new URI("http://docs.google.com/myfolder/mydoc.pdf"))
.setLock(true)
.build();
parser.readFromLister(pusher, null);
assertEquals(expected, pusher.getRecords().get(0));
}
@Test
public void testManyRecords() throws Exception {
final List<DocId> goldenIds;
{
final int idsToGenerate = 30001;
List<DocId> ids = new ArrayList<DocId>(idsToGenerate);
for (int i = 0; i < idsToGenerate; i++) {
ids.add(new DocId("id " + i));
}
goldenIds = ids;
}
String source;
{
StringBuilder sb = new StringBuilder("GSA Adaptor Data Version 1 [\n]\n");
for (DocId id : goldenIds) {
sb.append("id=");
sb.append(id.getUniqueId());
sb.append("\n");
}
source = sb.toString();
}
InputStream inputStream = new ByteArrayInputStream(source.getBytes("UTF-8"));
CommandStreamParser parser = new CommandStreamParser(inputStream);
AccumulatingDocIdPusher pusher = new AccumulatingDocIdPusher();
parser.readFromLister(pusher, null);
assertEquals(goldenIds, pusher.getDocIds());
}
}