| // Copyright 2011 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package com.google.enterprise.adaptor; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.net.URI; |
| import java.net.URISyntaxException; |
| import java.nio.ByteBuffer; |
| import java.nio.charset.Charset; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.Date; |
| import java.util.HashMap; |
| import java.util.Map; |
| import java.util.logging.Level; |
| import java.util.logging.Logger; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| /** |
| * Parses the adaptor data format into individual commands with associated data. |
| * |
| * This format is used for communication between the adaptor library and various command line |
| * adaptor components (lister, retriever, transformer, authorizer, etc.). It supports responses |
| * coming back from the command line adaptor implementation. The format supports a mixture of |
| * character and binary data. All character data must be encoded in UTF-8.<p> |
| * |
| * <h3>Header Format</h3> |
| * |
| * Communications (via either file or stream) begin with the header:<p> |
| * |
| * {@code GSA Adaptor Data Version 1 [<delimiter>]}<p> |
| * |
| * The version number must be proceeded by a single space and followed by a single space. The |
| * version number may increase in the future should the format be enhanced.<p> |
| * |
| * The string between the two square brackets will be used as the delimiter for the remainder of the |
| * file being read or for the duration of the communication session.<p> |
| * |
| * Care must be taken that the delimiter character string can never occur in a document ID, metadata |
| * name, metadata value, user name, or any other data that will be represented using the format with |
| * the exception of document contents, which can contain the delimiter string. The safest delimiter |
| * is likely to be the null character (the character with a value of zero). This character is |
| * unlikely to be present in existing names, paths, metadata, etc. Another possible choice is the |
| * newline character, though in many systems it is possible for this character to be present in |
| * document names and document paths, etc. If in doubt, the null character is recommended. A |
| * delimiter can be made up of more than one character so it is possible to have a delimiter that is |
| * <CR><LF> or a highly unique string (such as a GUID) that has an exceptionally low probability of |
| * occurring in the data.<p> |
| * |
| * The following characters may not be used in the delimiter:<p> 'A'-'Z', 'a'-'z' and '0'-'9' the |
| * alphanumeric characters<br> ':' colon<br> '/' slash<br> '-' hyphen<br> '_' underscore<br> ' ' |
| * space<br> '=' equals<br> '+' plus<br> '[' left square bracket<br> ']' right square bracket<p> |
| * |
| * <h3>Body Format</h3> Elements in the file start with one of the following commands. Commands |
| * where data precedes the next delimiter include an equal sign. Commands that are immediately |
| * followed by a delimiter do not include an equal sign. The first command must specify a document |
| * ID ("id=" or "id-list"). Command that don't specify a document ID are associated with the most |
| * recent previously specified document ID.<p> |
| * |
| * <h1>Common Commands:</h1> |
| * |
| * "id=" -- specifies a document id<p> |
| * |
| * "id-list" -- Starts a list of document ids each separated by |
| * the specified delimiter, the list is terminated by two consecutive delimiters or EOS |
| * (End-Of-Stream). ids in an id-list cannot have any of the associated commands listed below.<p> |
| * |
| * "repository-unavailable=" -- the document repository is unavailable. The string following the "=" |
| * character includes additional information that will be logged with the error. |
| * |
| * |
| * <h1>Lister Commands:</h1> |
| * |
| * "result-link=" -- specifies an alternative link to be displayed in the search results. |
| * This must be a properly formed URL. A "result link" is sometimes referred to as a "display URL". |
| * If no results-link is specified then the URL used for crawling is also used in the |
| * search results.<p> |
| * |
| * "last-modified=" -- Specifies the last time the document or its metadata has changed. |
| * The argument is a number representing the number of seconds since the standard base |
| * time known as the epoch", namely January 1, 1970, 00:00:00 GMT. If last-modified is specified |
| * and the document has never been crawled before or has been crawled prior to the last-modified |
| * time then the ocument will be marked as "crawl-immediately" by the GSA.<p> |
| * |
| * "crawl-immediately" -- Increases the crawling priority of the document such |
| * that the GSA will retrieve it sooner than normally crawled documents.<p> |
| * |
| * "crawl-once" -- specifies that the document will be crawled by the |
| * GSA one time but then never re-crawled.<p> |
| * |
| * "lock" -- Causes the document to remain in the index unless explicitly removed. |
| * Failure to retrieve the document during re-crawling will not result in |
| * removal of the document. If every document in the GSA is |
| * locked then locked document may be forced out when maximum capacity is |
| * reached.<p> |
| * |
| * "delete" -- this document should be deleted from the GSA index.<p> |
| * |
| * <h1>Retriever Commands:</h1> |
| * |
| * "up-to-date" -- specifies that the document is up-to-date with respect to its last crawled |
| * time.<p> |
| * |
| * "not-found" -- the document does not exists in the repository<p> |
| * |
| * "mime-type=" -- specifies the document's mime-type. If unspecified then the GSA will |
| * automatically assign a type to the document. <p> |
| * |
| * "meta-name=" -- specifies a metadata key, to be followed by a metadata-value<p> |
| * |
| * "meta-value=" -- specifies a metadata value associated with |
| * immediately preceding metadata-name<p> |
| * |
| * "content" -- signals the beginning of binary content which |
| * continues to the end of the file or stream<p> |
| * |
| * "last-modified=" -- specifies the last time the document or its metadata has changed. |
| * The argument is a number representing the number of seconds since the standard base |
| * time known as the epoch", namely January 1, 1970, 00:00:00 GMT.<p> |
| * |
| * "secure=" -- specifies whether the document is non-public. The argument is either 'true' or |
| * 'false'.<p> |
| * |
| * "anchor-uri=" -- specifies an anchor URI, to be followed by anchor-text.<p> |
| * |
| * "anchor-text=" -- specifies the text associated with an anchor-uri.<p> |
| * |
| * "no-index=" -- specifies whether the document should be indexed by the GSA. The argument is |
| * either 'true' or 'false'.<p> |
| * |
| * "no-follow=" -- specifies whether the document's links should be followed by the GSA. The |
| * argument is either 'true' or 'false'.<p> |
| * |
| * "no-archive=" -- specifies whether GSA document will allow the user to see a cached version of |
| * the document. The argument is either 'true' or 'false'.<p> |
| * |
| * "display-url=" -- specifies an alternative link to be displayed in the search results. |
| * This must be a properly formed URL.<p> |
| * |
| * "crawl-once=" -- specifies that the document will be crawled by the |
| * GSA one time but then never re-crawled. The argument should be 'true' or 'false'.<p> |
| * |
| * "lock=" -- Causes the document to remain in the index unless explicitly removed. |
| * If every document in the GSA is locked then locked document may be forced out when maximum |
| * capacity is reached.<p> |
| * |
| * <h1>Authorizer Commands:</h1> |
| * |
| * "authz-status=" -- specifies whether a document is visible to a |
| * specified user. The argument must be PERMIT, DENY or INDETERMINATE<p> |
| * |
| * "user=" -- specifies the user for whom the authorization check will be made<p> |
| * |
| * "password=" -- specifies the password for the user. (optional)<p> |
| * |
| * "group=" -- specifies a security group to which the user belongs.<p> |
| * |
| * End-of-stream terminates the data transmission. Multiple consecutive delimiters are collapsed |
| * into a single delimiter and terminates the current id-list should one exist.<p> |
| * |
| * Unrecognized commands generate a warning but are otherwise ignored. |
| * |
| * <h3>Examples</h3> |
| * |
| * Example 1:<p> |
| * |
| * <pre> |
| * {@code |
| * GSA Adaptor Data Version 1 [<delimiter>] |
| * id-list |
| * /home/repository/docs/file1 |
| * /home/repository/docs/file2 |
| * /home/repository/docs/file3 |
| * /home/repository/docs/file4 |
| * /home/repository/docs/file5 |
| * }</pre> |
| * |
| * Example 2:<p> |
| * |
| * <pre> |
| * {@code |
| * GSA Adaptor Data Version 1 [<delimiter>] |
| * id=/home/repository/docs/file1 |
| * id=/home/repository/docs/file2 |
| * crawl-immediately |
| * last-modified=20110803 16:07:23 |
| * |
| * meta-name=Department |
| * meta-content=Engineering |
| * |
| * meta-name=Creator |
| * meta-content=howardhawks |
| * |
| * id=/home/repository/docs/file3 |
| * id=/home/repository/docs/file4 |
| * id=/home/repository/docs/file5 |
| * }</pre> |
| * |
| * Data passed to command line authorizer via stdin for authz check. |
| * Entries will always occur in this order: user, password, group, id. |
| * password and group information is optional. Any number of group and |
| * id entries can exist. Each of the documents with a listed id should |
| * be checked. |
| * <pre> |
| * {@code |
| * GSA Adaptor Data Version 1 [<delimiter>] |
| * user=tim_smith |
| * password=abc123 |
| * group=managers |
| * group=research |
| * id=/home/repository/docs/file1 |
| * id=/home/repository/docs/file2 |
| * }</pre> |
| * |
| * AuthZ response passed from command line authorizer via stdout. |
| * Each doc id must include an authz-status entry. |
| * <pre> |
| * {@code |
| * GSA Adaptor Data Version 1 [<delimiter>] |
| * id=/home/repository/docs/file1 |
| * authz-status=PERMIT |
| * id=/home/repository/docs/file2 |
| * authz-status=DENY |
| * }</pre> |
| */ |
| public class CommandStreamParser { |
| |
| |
| private static enum Operation { |
| ID("id"), |
| RESULT_LINK("result-link"), |
| LAST_MODIFIED("last-modified"), |
| CRAWL_IMMEDIATELY("crawl-immediately"), |
| CRAWL_ONCE("crawl-once"), |
| LOCK("lock"), |
| DELETE("delete"), |
| UP_TO_DATE("up-to-date"), |
| NOT_FOUND("not-found"), |
| MIME_TYPE("mime-type"), |
| META_NAME("meta-name"), |
| META_VALUE("meta-value"), |
| CONTENT("content"), |
| AUTHZ_STATUS("authz-status"), |
| SECURE("secure"), |
| ANCHOR_URI("anchor-uri"), |
| ANCHOR_TEXT("anchor-text"), |
| NO_INDEX("no-index"), |
| NO_FOLLOW("no-follow"), |
| NO_ARCHIVE("no-archive"), |
| DISPLAY_URL("display-url"), |
| ; |
| |
| private final String commandName; |
| |
| private Operation(String commandName) { |
| this.commandName = commandName; |
| } |
| |
| public String getCommandName() { |
| return commandName; |
| } |
| } |
| |
| private static final Logger log = Logger.getLogger(CommandStreamParser.class.getName()); |
| private static final String HEADER_PREFIX = "GSA Adaptor Data Version"; |
| private static final String DISALLOWED_DELIMITER_CHARS_REGEX = "[a-zA-Z0-9:/\\-_ =\\+\\[\\]]"; |
| private static final Charset CHARSET = Charset.forName("UTF-8"); |
| |
| private static final Map<String, Operation> STRING_TO_OPERATION; |
| |
| static { |
| Map<String, Operation> stringToOperation = new HashMap<String, Operation>(); |
| for (Operation operation : Operation.values()) { |
| stringToOperation.put(operation.getCommandName(), operation); |
| } |
| STRING_TO_OPERATION = Collections.unmodifiableMap(stringToOperation); |
| } |
| |
| private InputStream inputStream; |
| private int versionNumber = 0; |
| private String delimiter; |
| private boolean inIdList; |
| |
| /** */ |
| private static class Command { |
| |
| private Operation operation; |
| private String argument; |
| |
| Command(Operation operation, String argument) { |
| this.operation = operation; |
| this.argument = argument; |
| } |
| |
| public Operation getOperation() { |
| return operation; |
| } |
| |
| public String getArgument() { |
| return argument; |
| } |
| |
| public boolean hasArgument() { |
| return argument != null; |
| } |
| } |
| |
| public CommandStreamParser(InputStream inputStream) { |
| this.inputStream = inputStream; |
| inIdList = false; |
| } |
| |
| public int getVersionNumber() throws IOException { |
| checkHeader(); |
| return versionNumber; |
| } |
| |
| public Map<DocId, AuthzStatus> readFromAuthorizer() throws IOException { |
| Map<DocId, AuthzStatus> result = new HashMap<DocId, AuthzStatus>(); |
| String docId = null; |
| AuthzStatus authzStatus = null; |
| Command command = readCommand(); |
| |
| // Starting out at end-of-stream so return an empty list. |
| if (command == null) { |
| return result; |
| } |
| |
| // The first operation must be a doc ID. |
| if (command.getOperation() != Operation.ID) { |
| throw new IOException("Authorizer Error: the first operator must be a document ID. " |
| + " Instead encountered '" + command.getOperation() + "'."); |
| } |
| while (command != null) { |
| switch (command.getOperation()) { |
| case ID: |
| if (docId != null) { |
| result.put(new DocId(docId), authzStatus); |
| } |
| docId = command.getArgument(); |
| authzStatus = null; |
| break; |
| case AUTHZ_STATUS: |
| String authzStatusString = command.getArgument(); |
| try { |
| authzStatus = AuthzStatus.valueOf(authzStatusString); |
| } catch (IllegalArgumentException ex) { |
| log.warning("Unrecognized authz-status of '" + authzStatusString + "' for document: '" + |
| docId + "'"); |
| } |
| break; |
| default: |
| throw new IOException("Authorizer Error: invalid operation: '" + command.getOperation() + |
| (command.hasArgument() ? "' with argument: '" + command.getArgument() + "'" : "'")); |
| } |
| command = readCommand(); |
| } |
| result.put(new DocId(docId), authzStatus); |
| |
| return Collections.unmodifiableMap(result); |
| } |
| |
| public void readFromRetriever(DocId docId, Response response) throws IOException { |
| |
| Command command = readCommand(); |
| |
| if (command == null) { |
| throw new IOException("Invalid or missing retriever data."); |
| } else if (command.getOperation() != Operation.ID) { |
| throw new IOException("Retriever Error: the first operator must be a document ID. " |
| + " Instead encountered '" + command.getOperation() + "'."); |
| } |
| |
| DocId foundDocId = new DocId(command.getArgument()); |
| if (!docId.equals(foundDocId)) { |
| throw new IOException("requested document " + docId + " does not match retrieved " |
| + "document " + foundDocId + "."); |
| } |
| command = readCommand(); |
| while (command != null) { |
| switch (command.getOperation()) { |
| case ID: |
| throw new IOException("Only one document ID can be specified in a retriever message"); |
| case CONTENT: |
| IOHelper.copyStream(inputStream, response.getOutputStream()); |
| break; |
| case META_NAME: |
| String metaName = command.getArgument(); |
| command = readCommand(); |
| if (command == null || command.getOperation() != Operation.META_VALUE) { |
| throw new IOException("meta-name must be immediately followed by meta-value"); |
| } |
| log.log(Level.FINEST, "Retriever: {0} has metadata {1}={2}", |
| new Object[] {docId.getUniqueId(), metaName, |
| command.getArgument()}); |
| response.addMetadata(metaName, command.getArgument()); |
| break; |
| case UP_TO_DATE: |
| log.log(Level.FINEST, "Retriever: {0} is up to date.", docId.getUniqueId()); |
| response.respondNotModified(); |
| break; |
| case NOT_FOUND: |
| response.respondNotFound(); |
| break; |
| case MIME_TYPE: |
| log.log(Level.FINEST, "Retriever: {0} has mime-type {1}", |
| new Object[] {docId.getUniqueId(), command.getArgument()}); |
| response.setContentType(command.getArgument()); |
| break; |
| case LAST_MODIFIED: |
| // Convert seconds to milliseconds for Date constructor. |
| response.setLastModified(new Date(Long.parseLong(command.getArgument()) * 1000)); |
| break; |
| case SECURE: |
| response.setSecure(Boolean.parseBoolean(command.getArgument())); |
| break; |
| case ANCHOR_URI: |
| URI anchorUri = URI.create(command.getArgument()); |
| command = readCommand(); |
| if (command == null || command.getOperation() != Operation.ANCHOR_TEXT) { |
| throw new IOException("anchor-uri must be immediately followed by anchor-text"); |
| } |
| response.addAnchor(anchorUri, command.getArgument()); |
| break; |
| case NO_INDEX: |
| response.setNoIndex(Boolean.parseBoolean(command.getArgument())); |
| break; |
| case NO_FOLLOW: |
| response.setNoFollow(Boolean.parseBoolean(command.getArgument())); |
| break; |
| case NO_ARCHIVE: |
| response.setNoArchive(Boolean.parseBoolean(command.getArgument())); |
| break; |
| case DISPLAY_URL: |
| response.setDisplayUrl(URI.create(command.getArgument())); |
| break; |
| case CRAWL_ONCE: |
| response.setCrawlOnce(Boolean.parseBoolean(command.getArgument())); |
| break; |
| case LOCK: |
| response.setLock(Boolean.parseBoolean(command.getArgument())); |
| break; |
| default: |
| throw new IOException("Retriever Error: invalid operation: '" + command.getOperation() + |
| (command.hasArgument() ? "' with argument: '" + command.getArgument() + "'" : "'")); |
| } |
| command = readCommand(); |
| } |
| } |
| |
| /** |
| * Parse a listing response, sending results to {@code pusher}. If {@code handler} is {@code |
| * null}, then {@code pusher}'s default handler will be used. In case of failure sending in |
| * {@code pusher}, the rest of the input stream may not be read. |
| * |
| * @return {@code null} on success, otherwise the first Record to fail |
| */ |
| public DocIdPusher.Record readFromLister(DocIdPusher pusher, ExceptionHandler handler) |
| throws IOException, InterruptedException { |
| ArrayList<DocIdPusher.Record> result = new ArrayList<DocIdPusher.Record>(); |
| DocIdPusher.Record.Builder builder = null; |
| Command command = readCommand(); |
| |
| // Starting out at end-of-stream so don't send anything. |
| if (command == null) { |
| return null; |
| } |
| |
| // The first operation must be a doc ID. |
| if (command.getOperation() != Operation.ID) { |
| throw new IOException("Lister Error: the first operator must be a document ID. " |
| + " Instead encountered '" + command.getOperation() + "'."); |
| } |
| while (command != null) { |
| switch (command.getOperation()) { |
| case ID: |
| if (builder != null) { |
| result.add(builder.build()); |
| // TODO(ejona): make threshold smarter. |
| if (result.size() >= 10000) { |
| DocIdPusher.Record errorRecord = pusher.pushRecords(result, handler); |
| if (errorRecord != null) { |
| return errorRecord; |
| } |
| result.clear(); |
| } |
| } |
| builder = new DocIdPusher.Record.Builder(new DocId(command.getArgument())); |
| break; |
| case LAST_MODIFIED: |
| // Convert seconds to milliseconds for Date constructor. |
| builder.setLastModified(new Date(Long.parseLong(command.getArgument()) * 1000)); |
| break; |
| case RESULT_LINK: |
| try { |
| builder.setResultLink(new URI(command.getArgument())); |
| } catch (URISyntaxException e) { |
| throw new IOException("Lister Error: invalid URL: '" + command.getOperation() |
| + (command.hasArgument() ? "' with argument: '" |
| + command.getArgument() + "'" : "'"), e); |
| } |
| break; |
| case CRAWL_IMMEDIATELY: |
| builder.setCrawlImmediately(true); |
| break; |
| case CRAWL_ONCE: |
| builder.setCrawlOnce(true); |
| break; |
| case LOCK: |
| builder.setLock(true); |
| break; |
| case DELETE: |
| builder.setDeleteFromIndex(true); |
| break; |
| default: |
| throw new IOException("Lister Error: invalid operation: '" + command.getOperation() + |
| (command.hasArgument() ? "' with argument: '" + command.getArgument() + "'" : "'")); |
| } |
| command = readCommand(); |
| } |
| result.add(builder.build()); |
| return pusher.pushRecords(result, handler); |
| } |
| |
| /** |
| * Read a command from the command stream |
| * |
| * @return The next command from the command stream. for end-of-steam null is returned. |
| * @throws IOException on stream read error |
| */ |
| private Command readCommand() throws IOException { |
| |
| Command result = null; |
| |
| while (result == null) { |
| String commandTokens[] = parseNextLine(); |
| if (commandTokens == null) { |
| return null; |
| } else if ((commandTokens[0].equals("repository-unavailable"))) { |
| throw new IOException("Error: repository unavailable. " |
| + (commandTokens.length > 1 ? commandTokens[1] : "")); |
| } |
| |
| Operation operation = STRING_TO_OPERATION.get(commandTokens[0]); |
| // Skip over unrecognized commands |
| if (operation == null) { |
| // TODO(johnfelton) add a warning about an unrecognized command |
| continue; |
| } |
| |
| String argument = null; |
| |
| if (commandTokens.length > 1) { |
| argument = commandTokens[1]; |
| } |
| |
| result = new Command(operation, argument); |
| } |
| return result; |
| } |
| |
| private String[] parseNextLine() throws IOException { |
| checkHeader(); |
| String line = ""; |
| while (line.length() == 0) { |
| line = readCharsUntilMarker(delimiter); |
| // On End-Of-Stream return the end-message command |
| if (line == null) { |
| return null; |
| } |
| // If nothing is between the last delimiter and this one then exit ID list mode |
| if (inIdList && line.length() == 0) { |
| inIdList = false; |
| } else if (!inIdList && line.equals("id-list")) { |
| inIdList = true; |
| line = ""; // loop again |
| } |
| } |
| if (inIdList) { |
| return new String[]{"id", line}; |
| } |
| return line.split("=", 2); |
| } |
| |
| /** |
| * Read and verify the data format header if needed. |
| */ |
| private void checkHeader() throws IOException { |
| if (this.delimiter != null) { |
| return; |
| } |
| |
| String line = readCharsUntilMarker("["); |
| if ((line == null) || (line.length() < HEADER_PREFIX.length()) || |
| !line.substring(0, HEADER_PREFIX.length()).equals(HEADER_PREFIX)) { |
| throw new IOException("Adaptor data must begin with '" + HEADER_PREFIX + "'"); |
| } |
| |
| String versionNumberString = line.substring(HEADER_PREFIX.length()); |
| if (versionNumberString.length() < 3) { |
| throw new IOException("Format version '" + versionNumberString + "' is invalid. " + |
| "The version must be at least one digit with one leading space and one trailing space."); |
| } |
| |
| delimiter = readCharsUntilMarker("]"); |
| if ((delimiter == null) || (delimiter.length() < 1)) { |
| throw new IOException("Delimiter must be at least one character long."); |
| } |
| |
| Pattern pattern = Pattern.compile(DISALLOWED_DELIMITER_CHARS_REGEX); |
| Matcher matcher = pattern.matcher(delimiter); |
| |
| if (matcher.find()) { |
| throw new IOException("Invalid character in delimiter."); |
| } |
| |
| try { |
| versionNumber = Integer.parseInt(versionNumberString.trim()); |
| } catch (NumberFormatException e) { |
| throw new IOException("Format version '" + versionNumberString + "' is invalid."); |
| } |
| } |
| |
| |
| private byte[] readBytesUntilMarker(byte[] marker) throws IOException { |
| |
| if (marker.length == 0) { |
| throw new IOException("Internal Error: Marker length must be greater than zero."); |
| } |
| ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); |
| int matchPosition = 0; |
| int nextByte = 0; |
| |
| while (matchPosition < marker.length) { |
| nextByte = inputStream.read(); |
| if (nextByte == ((int) marker[matchPosition] & 0xff)) { |
| matchPosition += 1; |
| } else { |
| if (matchPosition > 0) { |
| byteArrayOutputStream.write(marker, 0, matchPosition); |
| matchPosition = 0; |
| } |
| if (nextByte == -1) { |
| break; |
| } else { |
| byteArrayOutputStream.write(nextByte); |
| } |
| } |
| } |
| byte[] result = byteArrayOutputStream.toByteArray(); |
| if (nextByte == -1 && result.length == 0) { |
| return null; |
| } else { |
| return result; |
| } |
| } |
| |
| private String readCharsUntilMarker(String marker) throws IOException { |
| byte[] byteMarker = marker.getBytes(CHARSET); |
| byte[] bytes = readBytesUntilMarker(byteMarker); |
| if (bytes == null) { |
| return null; |
| } |
| return CHARSET.newDecoder().decode(ByteBuffer.wrap(bytes)).toString(); |
| } |
| } |