blob: c341f12aee5ce3e2ed947396bf27917728a7cefe [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package adaptorlib;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.Arrays;
/**
* An input stream than can be read as both a byte stream and a character stream.
*/
public class ByteCharInputStream {
private InputStream inputStream;
private CharsetDecoder charsetDecoder;
private final int MAX_BYTES_IN_UTF8_CHAR = 6;
private byte[] byteArray = new byte[MAX_BYTES_IN_UTF8_CHAR];
public ByteCharInputStream(InputStream inputStream) {
this.inputStream = inputStream;
this.charsetDecoder = Charset.availableCharsets().get("UTF-8").newDecoder( );
}
public int read(byte[] bytes, int off, int len) throws IOException {
return inputStream.read(bytes, off, len);
}
public int readFully(byte[] bytes, int off, int len) throws IOException {
return IOHelper.readFully(inputStream, bytes, off, len);
}
/**
* Reads a single character.
*
* @return a unicode codepoint (one char or a surrogate pair) or null if the end of the stream
* has been reached
*
* @exception java.io.IOException If an I/O error occurs
*/
public String readChar() throws IOException {
int bytesRead = readFully(byteArray, 0, 1);
if (bytesRead == -1) {
return null;
}
int byteCount = bytesInUtf8Char(byteArray[0]);
if (byteCount == 1) {
return new String(byteArray, 0, 1, "UTF-8");
} else {
bytesRead = readFully(byteArray, 1, byteCount - 1);
if (bytesRead != byteCount - 1) {
throw new IOException("Invalid UTF-8 Character");
}
CharBuffer charBuffer = charsetDecoder.decode(ByteBuffer.wrap( byteArray, 0, byteCount));
return new String(charBuffer.array(), 0, charBuffer.length());
}
}
/**
* Reads characters from the stream until {@code delimiter} or EOS (end of stream)
* is encountered and returns them as a {@code String}.
* The delimiter is not included in the returned string.
*
* @param delimiter - string to read until
*
* @return string of characters read before reaching the delimiter.
* Null if EOS is encountered before any characters are read
*
* @throws IOException
*/
public String readToDelimiter(String delimiter) throws IOException {
if (delimiter == null || delimiter.isEmpty()) {
throw new IllegalArgumentException("Delimiter may not be null or empty.");
}
String nextChar = readChar();
// If EOS then return null
if (nextChar == null) {
return null;
}
StringBuilder stringBuilder = new StringBuilder();
while (nextChar != null) {
stringBuilder.append(nextChar);
int delimiterPosition = stringBuilder.length() - delimiter.length();
if ((delimiterPosition >= 0) &&
(stringBuilder.substring(delimiterPosition).equals(delimiter))) {
stringBuilder.delete(delimiterPosition, delimiterPosition + delimiter.length());
nextChar = null;
} else {
nextChar = readChar();
}
}
return stringBuilder.toString();
}
/**
* Determines the number of bytes in a UTF-8 character given the first byte.
*
* @param firstByte the first byte of a UTF-8 character
* @return the number of bytes used to represent the character, including the first byte that
* was passed.
* @throws IOException thrown if firstByte is not a valid first byte in an UTF-8 character.
*/
private int bytesInUtf8Char(byte firstByte) throws IOException {
if ((firstByte & 0x80) == 0) {
return 1; // ASCII - High order bit not set
} else if ((firstByte & 0xE0) == 0xC0) {
return 2;
} else if ((firstByte & 0xF0) == 0xE0) {
return 3;
} else if ((firstByte & 0xF8) == 0xF0) {
return 4;
} else if ((firstByte & 0xFC) == 0xF8) {
return 5;
} else if ((firstByte & 0xFE) == 0xFC) {
return 6;
} else {
throw new IOException("Invalid UTF-8 Character");
}
}
}