jaxp/src/com/sun/org/apache/xerces/internal/impl/io/UTF8Reader.java - edge/openjdk - Git at Google

 /*
  * reserved comment block
  * DO NOT REMOVE OR ALTER!
  */
 /*
  * Copyright 2000-2004 The Apache Software Foundation.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.sun.org.apache.xerces.internal.impl.io;

 import java.io.InputStream;
 import java.io.IOException;
 import java.io.Reader;

 import java.util.Locale;
 import com.sun.org.apache.xerces.internal.util.MessageFormatter;
 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;

 import com.sun.xml.internal.stream.util.BufferAllocator;
 import com.sun.xml.internal.stream.util.ThreadLocalBufferAllocator;

 /**
  * <p>A UTF-8 reader.</p>
  *
  * @xerces.internal
  *
  * @author Andy Clark, IBM
  *
  */
 public class UTF8Reader
     extends Reader {

     //
     // Constants
     //

     /** Default byte buffer size (2048). */
     public static final int DEFAULT_BUFFER_SIZE = 2048;

     // debugging

     /** Debug read. */
     private static final boolean DEBUG_READ = false;

     //
     // Data
     //

     /** Input stream. */
     protected InputStream fInputStream;

     /** Byte buffer. */
     protected byte[] fBuffer;

     /** Offset into buffer. */
     protected int fOffset;

     /** Surrogate character. */
     private int fSurrogate = -1;

     // message formatter; used to produce localized
     // exception messages
     private MessageFormatter fFormatter = null;

     //Locale to use for messages
     private Locale fLocale = null;

     //
     // Constructors
     //

     /**
      * Constructs a UTF-8 reader from the specified input stream
      * using the default buffer size.  Primarily for testing.
      *
      * @param inputStream The input stream.
      */
     public UTF8Reader(InputStream inputStream) {
         this(inputStream, DEFAULT_BUFFER_SIZE, new XMLMessageFormatter(), Locale.getDefault());
     } // <init>(InputStream, MessageFormatter)

     /**
      * Constructs a UTF-8 reader from the specified input stream
      * using the default buffer size and the given MessageFormatter.
      *
      * @param inputStream The input stream.
      * @param messageFormatter  given MessageFormatter
      * @param locale    Locale to use for messages
      */
     public UTF8Reader(InputStream inputStream, MessageFormatter messageFormatter,
             Locale locale) {
         this(inputStream, DEFAULT_BUFFER_SIZE, messageFormatter, locale);
     } // <init>(InputStream, MessageFormatter, Locale)

     /**
      * Constructs a UTF-8 reader from the specified input stream,
      * buffer size and MessageFormatter.
      *
      * @param inputStream The input stream.
      * @param size        The initial buffer size.
      * @param messageFormatter  the formatter for localizing/formatting errors.
      * @param locale    the Locale to use for messages
      */
     public UTF8Reader(InputStream inputStream, int size,
             MessageFormatter messageFormatter, Locale locale) {
         fInputStream = inputStream;
         BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator();
         fBuffer = ba.getByteBuffer(size);
         if (fBuffer == null) {
             fBuffer = new byte[size];
         }
         fFormatter = messageFormatter;
         fLocale = locale;
     } // <init>(InputStream, int, MessageFormatter, Locale)

     //
     // Reader methods
     //

     /**
      * Read a single character.  This method will block until a character is
      * available, an I/O error occurs, or the end of the stream is reached.
      *
      * <p> Subclasses that intend to support efficient single-character input
      * should override this method.
      *
      * @return     The character read, as an integer in the range 0 to 16383
      *             (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
      *             been reached
      *
      * @exception  IOException  If an I/O error occurs
      */
     public int read() throws IOException {

         // decode character
         int c = fSurrogate;
         if (fSurrogate == -1) {
             // NOTE: We use the index into the buffer if there are remaining
             //       bytes from the last block read. -Ac
             int index = 0;

             // get first byte
             int b0 = index == fOffset
                    ? fInputStream.read() : fBuffer[index++] & 0x00FF;
             if (b0 == -1) {
                 return -1;
             }

             // UTF-8:   [0xxx xxxx]
             // Unicode: [0000 0000] [0xxx xxxx]
             if (b0 < 0x80) {
                 c = (char)b0;
             }

             // UTF-8:   [110y yyyy] [10xx xxxx]
             // Unicode: [0000 0yyy] [yyxx xxxx]
             else if ((b0 & 0xE0) == 0xC0 && (b0 & 0x1E) != 0) {
                 int b1 = index == fOffset
                        ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                 if (b1 == -1) {
                     expectedByte(2, 2);
                 }
                 if ((b1 & 0xC0) != 0x80) {
                     invalidByte(2, 2, b1);
                 }
                 c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
             }

             // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
             // Unicode: [zzzz yyyy] [yyxx xxxx]
             else if ((b0 & 0xF0) == 0xE0) {
                 int b1 = index == fOffset
                        ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                 if (b1 == -1) {
                     expectedByte(2, 3);
                 }
                 if ((b1 & 0xC0) != 0x80
                     || (b0 == 0xED && b1 >= 0xA0)
                     || ((b0 & 0x0F) == 0 && (b1 & 0x20) == 0)) {
                     invalidByte(2, 3, b1);
                 }
                 int b2 = index == fOffset
                        ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                 if (b2 == -1) {
                     expectedByte(3, 3);
                 }
                 if ((b2 & 0xC0) != 0x80) {
                     invalidByte(3, 3, b2);
                 }
                 c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
                     (b2 & 0x003F);
             }

             // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
             // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
             //          [1101 11yy] [yyxx xxxx] (low surrogate)
             //          * uuuuu = wwww + 1
             else if ((b0 & 0xF8) == 0xF0) {
                 int b1 = index == fOffset
                        ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                 if (b1 == -1) {
                     expectedByte(2, 4);
                 }
                 if ((b1 & 0xC0) != 0x80
                     || ((b1 & 0x30) == 0 && (b0 & 0x07) == 0)) {
                     invalidByte(2, 3, b1);
                 }
                 int b2 = index == fOffset
                        ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                 if (b2 == -1) {
                     expectedByte(3, 4);
                 }
                 if ((b2 & 0xC0) != 0x80) {
                     invalidByte(3, 3, b2);
                 }
                 int b3 = index == fOffset
                        ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                 if (b3 == -1) {
                     expectedByte(4, 4);
                 }
                 if ((b3 & 0xC0) != 0x80) {
                     invalidByte(4, 4, b3);
                 }
                 int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
                 if (uuuuu > 0x10) {
                     invalidSurrogate(uuuuu);
                 }
                 int wwww = uuuuu - 1;
                 int hs = 0xD800 |
                          ((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) |
                          ((b2 >> 4) & 0x0003);
                 int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F);
                 c = hs;
                 fSurrogate = ls;
             }

             // error
             else {
                 invalidByte(1, 1, b0);
             }
         }

         // use surrogate
         else {
             fSurrogate = -1;
         }

         // return character
         if (DEBUG_READ) {
             System.out.println("read(): 0x"+Integer.toHexString(c));
         }
         return c;

     } // read():int

     /**
      * Read characters into a portion of an array.  This method will block
      * until some input is available, an I/O error occurs, or the end of the
      * stream is reached.
      *
      * @param      ch     Destination buffer
      * @param      offset Offset at which to start storing characters
      * @param      length Maximum number of characters to read
      *
      * @return     The number of characters read, or -1 if the end of the
      *             stream has been reached
      *
      * @exception  IOException  If an I/O error occurs
      */
     public int read(char ch[], int offset, int length) throws IOException {

         // handle surrogate
         int out = offset;
         if (fSurrogate != -1) {
             ch[offset + 1] = (char)fSurrogate;
             fSurrogate = -1;
             length--;
             out++;
         }

         // read bytes
         int count = 0;
         if (fOffset == 0) {
             // adjust length to read
             if (length > fBuffer.length) {
                 length = fBuffer.length;
             }

             // perform read operation
             count = fInputStream.read(fBuffer, 0, length);
             if (count == -1) {
                 return -1;
             }
             count += out - offset;
         }

         // skip read; last character was in error
         // NOTE: Having an offset value other than zero means that there was
         //       an error in the last character read. In this case, we have
         //       skipped the read so we don't consume any bytes past the
         //       error. By signalling the error on the next block read we
         //       allow the method to return the most valid characters that
         //       it can on the previous block read. -Ac
         else {
             count = fOffset;
             fOffset = 0;
         }

         // convert bytes to characters
         final int total = count;
         int in;
         byte byte1;
         final byte byte0 = 0;
         for (in = 0; in < total; in++) {
             byte1 = fBuffer[in];
             if (byte1 >= byte0) {
                 ch[out++] = (char)byte1;
             }
             else   {
                 break;
             }
         }
         for ( ; in < total; in++) {
             byte1 = fBuffer[in];

             // UTF-8:   [0xxx xxxx]
             // Unicode: [0000 0000] [0xxx xxxx]
             if (byte1 >= byte0) {
                 ch[out++] = (char)byte1;
                 continue;
             }

             // UTF-8:   [110y yyyy] [10xx xxxx]
             // Unicode: [0000 0yyy] [yyxx xxxx]
             int b0 = byte1 & 0x0FF;
             if ((b0 & 0xE0) == 0xC0 && (b0 & 0x1E) != 0) {
                 int b1 = -1;
                 if (++in < total) {
                     b1 = fBuffer[in] & 0x00FF;
                 }
                 else {
                     b1 = fInputStream.read();
                     if (b1 == -1) {
                         if (out > offset) {
                             fBuffer[0] = (byte)b0;
                             fOffset = 1;
                             return out - offset;
                         }
                         expectedByte(2, 2);
                     }
                     count++;
                 }
                 if ((b1 & 0xC0) != 0x80) {
                     if (out > offset) {
                         fBuffer[0] = (byte)b0;
                         fBuffer[1] = (byte)b1;
                         fOffset = 2;
                         return out - offset;
                     }
                     invalidByte(2, 2, b1);
                 }
                 int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
                 ch[out++] = (char)c;
                 count -= 1;
                 continue;
             }

             // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
             // Unicode: [zzzz yyyy] [yyxx xxxx]
             if ((b0 & 0xF0) == 0xE0) {
                 int b1 = -1;
                 if (++in < total) {
                     b1 = fBuffer[in] & 0x00FF;
                 }
                 else {
                     b1 = fInputStream.read();
                     if (b1 == -1) {
                         if (out > offset) {
                             fBuffer[0] = (byte)b0;
                             fOffset = 1;
                             return out - offset;
                         }
                         expectedByte(2, 3);
                     }
                     count++;
                 }
                 if ((b1 & 0xC0) != 0x80
                     || (b0 == 0xED && b1 >= 0xA0)
                     || ((b0 & 0x0F) == 0 && (b1 & 0x20) == 0)) {
                     if (out > offset) {
                         fBuffer[0] = (byte)b0;
                         fBuffer[1] = (byte)b1;
                         fOffset = 2;
                         return out - offset;
                     }
                     invalidByte(2, 3, b1);
                 }
                 int b2 = -1;
                 if (++in < total) {
                     b2 = fBuffer[in] & 0x00FF;
                 }
                 else {
                     b2 = fInputStream.read();
                     if (b2 == -1) {
                         if (out > offset) {
                             fBuffer[0] = (byte)b0;
                             fBuffer[1] = (byte)b1;
                             fOffset = 2;
                             return out - offset;
                         }
                         expectedByte(3, 3);
                     }
                     count++;
                 }
                 if ((b2 & 0xC0) != 0x80) {
                     if (out > offset) {
                         fBuffer[0] = (byte)b0;
                         fBuffer[1] = (byte)b1;
                         fBuffer[2] = (byte)b2;
                         fOffset = 3;
                         return out - offset;
                     }
                     invalidByte(3, 3, b2);
                 }
                 int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
                         (b2 & 0x003F);
                 ch[out++] = (char)c;
                 count -= 2;
                 continue;
             }

             // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
             // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
             //          [1101 11yy] [yyxx xxxx] (low surrogate)
             //          * uuuuu = wwww + 1
             if ((b0 & 0xF8) == 0xF0) {
                 int b1 = -1;
                 if (++in < total) {
                     b1 = fBuffer[in] & 0x00FF;
                 }
                 else {
                     b1 = fInputStream.read();
                     if (b1 == -1) {
                         if (out > offset) {
                             fBuffer[0] = (byte)b0;
                             fOffset = 1;
                             return out - offset;
                         }
                         expectedByte(2, 4);
                     }
                     count++;
                 }
                 if ((b1 & 0xC0) != 0x80
                     || ((b1 & 0x30) == 0 && (b0 & 0x07) == 0)) {
                     if (out > offset) {
                         fBuffer[0] = (byte)b0;
                         fBuffer[1] = (byte)b1;
                         fOffset = 2;
                         return out - offset;
                     }
                     invalidByte(2, 4, b1);
                 }
                 int b2 = -1;
                 if (++in < total) {
                     b2 = fBuffer[in] & 0x00FF;
                 }
                 else {
                     b2 = fInputStream.read();
                     if (b2 == -1) {
                         if (out > offset) {
                             fBuffer[0] = (byte)b0;
                             fBuffer[1] = (byte)b1;
                             fOffset = 2;
                             return out - offset;
                         }
                         expectedByte(3, 4);
                     }
                     count++;
                 }
                 if ((b2 & 0xC0) != 0x80) {
                     if (out > offset) {
                         fBuffer[0] = (byte)b0;
                         fBuffer[1] = (byte)b1;
                         fBuffer[2] = (byte)b2;
                         fOffset = 3;
                         return out - offset;
                     }
                     invalidByte(3, 4, b2);
                 }
                 int b3 = -1;
                 if (++in < total) {
                     b3 = fBuffer[in] & 0x00FF;
                 }
                 else {
                     b3 = fInputStream.read();
                     if (b3 == -1) {
                         if (out > offset) {
                             fBuffer[0] = (byte)b0;
                             fBuffer[1] = (byte)b1;
                             fBuffer[2] = (byte)b2;
                             fOffset = 3;
                             return out - offset;
                         }
                         expectedByte(4, 4);
                     }
                     count++;
                 }
                 if ((b3 & 0xC0) != 0x80) {
                     if (out > offset) {
                         fBuffer[0] = (byte)b0;
                         fBuffer[1] = (byte)b1;
                         fBuffer[2] = (byte)b2;
                         fBuffer[3] = (byte)b3;
                         fOffset = 4;
                         return out - offset;
                     }
                     invalidByte(4, 4, b2);
                 }

                 // check if output buffer is large enough to hold 2 surrogate chars
                 if (out + 1 >= ch.length) {
                     fBuffer[0] = (byte)b0;
                     fBuffer[1] = (byte)b1;
                     fBuffer[2] = (byte)b2;
                     fBuffer[3] = (byte)b3;
                     fOffset = 4;
                     return out - offset;
                 }

                 // decode bytes into surrogate characters
                 int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
                 if (uuuuu > 0x10) {
                     invalidSurrogate(uuuuu);
                 }
                 int wwww = uuuuu - 1;
                 int zzzz = b1 & 0x000F;
                 int yyyyyy = b2 & 0x003F;
                 int xxxxxx = b3 & 0x003F;
                 int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);
                 int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;

                 // set characters
                 ch[out++] = (char)hs;
                 ch[out++] = (char)ls;
                 count -= 2;
                 continue;
             }

             // error
             if (out > offset) {
                 fBuffer[0] = (byte)b0;
                 fOffset = 1;
                 return out - offset;
             }
             invalidByte(1, 1, b0);
         }

         // return number of characters converted
         if (DEBUG_READ) {
             System.out.println("read(char[],"+offset+','+length+"): count="+count);
         }
         return count;

     } // read(char[],int,int)

     /**
      * Skip characters.  This method will block until some characters are
      * available, an I/O error occurs, or the end of the stream is reached.
      *
      * @param  n  The number of characters to skip
      *
      * @return    The number of characters actually skipped
      *
      * @exception  IOException  If an I/O error occurs
      */
     public long skip(long n) throws IOException {

         long remaining = n;
         final char[] ch = new char[fBuffer.length];
         do {
             int length = ch.length < remaining ? ch.length : (int)remaining;
             int count = read(ch, 0, length);
             if (count > 0) {
                 remaining -= count;
             }
             else {
                 break;
             }
         } while (remaining > 0);

         long skipped = n - remaining;
         return skipped;

     } // skip(long):long

     /**
      * Tell whether this stream is ready to be read.
      *
      * @return True if the next read() is guaranteed not to block for input,
      * false otherwise.  Note that returning false does not guarantee that the
      * next read will block.
      *
      * @exception  IOException  If an I/O error occurs
      */
     public boolean ready() throws IOException {
         return false;
     } // ready()

     /**
      * Tell whether this stream supports the mark() operation.
      */
     public boolean markSupported() {
         return false;
     } // markSupported()

     /**
      * Mark the present position in the stream.  Subsequent calls to reset()
      * will attempt to reposition the stream to this point.  Not all
      * character-input streams support the mark() operation.
      *
      * @param  readAheadLimit  Limit on the number of characters that may be
      *                         read while still preserving the mark.  After
      *                         reading this many characters, attempting to
      *                         reset the stream may fail.
      *
      * @exception  IOException  If the stream does not support mark(),
      *                          or if some other I/O error occurs
      */
     public void mark(int readAheadLimit) throws IOException {
         throw new IOException(fFormatter.formatMessage(fLocale, "OperationNotSupported", new Object[]{"mark()", "UTF-8"}));
     } // mark(int)

     /**
      * Reset the stream.  If the stream has been marked, then attempt to
      * reposition it at the mark.  If the stream has not been marked, then
      * attempt to reset it in some way appropriate to the particular stream,
      * for example by repositioning it to its starting point.  Not all
      * character-input streams support the reset() operation, and some support
      * reset() without supporting mark().
      *
      * @exception  IOException  If the stream has not been marked,
      *                          or if the mark has been invalidated,
      *                          or if the stream does not support reset(),
      *                          or if some other I/O error occurs
      */
     public void reset() throws IOException {
         fOffset = 0;
         fSurrogate = -1;
     } // reset()

     /**
      * Close the stream.  Once a stream has been closed, further read(),
      * ready(), mark(), or reset() invocations will throw an IOException.
      * Closing a previously-closed stream, however, has no effect.
      *
      * @exception  IOException  If an I/O error occurs
      */
     public void close() throws IOException {
         BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator();
         ba.returnByteBuffer(fBuffer);
         fBuffer = null;
         fInputStream.close();
     } // close()

     //
     // Private methods
     //

     /** Throws an exception for expected byte. */
     private void expectedByte(int position, int count)
         throws MalformedByteSequenceException {

         throw new MalformedByteSequenceException(fFormatter,
             fLocale,
             XMLMessageFormatter.XML_DOMAIN,
             "ExpectedByte",
             new Object[] {Integer.toString(position), Integer.toString(count)});

     } // expectedByte(int,int)

     /** Throws an exception for invalid byte. */
     private void invalidByte(int position, int count, int c)
         throws MalformedByteSequenceException {

         throw new MalformedByteSequenceException(fFormatter,
             fLocale,
             XMLMessageFormatter.XML_DOMAIN,
             "InvalidByte",
             new Object [] {Integer.toString(position), Integer.toString(count)});

     } // invalidByte(int,int,int)

     /** Throws an exception for invalid surrogate bits. */
     private void invalidSurrogate(int uuuuu) throws MalformedByteSequenceException {

         throw new MalformedByteSequenceException(fFormatter,
             fLocale,
             XMLMessageFormatter.XML_DOMAIN,
             "InvalidHighSurrogate",
             new Object[] {Integer.toHexString(uuuuu)});

     } // invalidSurrogate(int)

 } // class UTF8Reader
	/*
	* reserved comment block
	* DO NOT REMOVE OR ALTER!
	*/
	/*
	* Copyright 2000-2004 The Apache Software Foundation.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.sun.org.apache.xerces.internal.impl.io;

	import java.io.InputStream;
	import java.io.IOException;
	import java.io.Reader;

	import java.util.Locale;
	import com.sun.org.apache.xerces.internal.util.MessageFormatter;
	import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;

	import com.sun.xml.internal.stream.util.BufferAllocator;
	import com.sun.xml.internal.stream.util.ThreadLocalBufferAllocator;

	/**
	* <p>A UTF-8 reader.</p>
	*
	* @xerces.internal
	*
	* @author Andy Clark, IBM
	*
	*/
	public class UTF8Reader
	extends Reader {

	//
	// Constants
	//

	/** Default byte buffer size (2048). */
	public static final int DEFAULT_BUFFER_SIZE = 2048;

	// debugging

	/** Debug read. */
	private static final boolean DEBUG_READ = false;

	//
	// Data
	//

	/** Input stream. */
	protected InputStream fInputStream;

	/** Byte buffer. */
	protected byte[] fBuffer;

	/** Offset into buffer. */
	protected int fOffset;

	/** Surrogate character. */
	private int fSurrogate = -1;

	// message formatter; used to produce localized
	// exception messages
	private MessageFormatter fFormatter = null;

	//Locale to use for messages
	private Locale fLocale = null;

	//
	// Constructors
	//

	/**
	* Constructs a UTF-8 reader from the specified input stream
	* using the default buffer size. Primarily for testing.
	*
	* @param inputStream The input stream.
	*/
	public UTF8Reader(InputStream inputStream) {
	this(inputStream, DEFAULT_BUFFER_SIZE, new XMLMessageFormatter(), Locale.getDefault());
	} // <init>(InputStream, MessageFormatter)

	/**
	* Constructs a UTF-8 reader from the specified input stream
	* using the default buffer size and the given MessageFormatter.
	*
	* @param inputStream The input stream.
	* @param messageFormatter given MessageFormatter
	* @param locale Locale to use for messages
	*/
	public UTF8Reader(InputStream inputStream, MessageFormatter messageFormatter,
	Locale locale) {
	this(inputStream, DEFAULT_BUFFER_SIZE, messageFormatter, locale);
	} // <init>(InputStream, MessageFormatter, Locale)

	/**
	* Constructs a UTF-8 reader from the specified input stream,
	* buffer size and MessageFormatter.
	*
	* @param inputStream The input stream.
	* @param size The initial buffer size.
	* @param messageFormatter the formatter for localizing/formatting errors.
	* @param locale the Locale to use for messages
	*/
	public UTF8Reader(InputStream inputStream, int size,
	MessageFormatter messageFormatter, Locale locale) {
	fInputStream = inputStream;
	BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator();
	fBuffer = ba.getByteBuffer(size);
	if (fBuffer == null) {
	fBuffer = new byte[size];
	}
	fFormatter = messageFormatter;
	fLocale = locale;
	} // <init>(InputStream, int, MessageFormatter, Locale)

	//
	// Reader methods
	//

	/**
	* Read a single character. This method will block until a character is
	* available, an I/O error occurs, or the end of the stream is reached.
	*
	* <p> Subclasses that intend to support efficient single-character input
	* should override this method.
	*
	* @return The character read, as an integer in the range 0 to 16383
	* (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
	* been reached
	*
	* @exception IOException If an I/O error occurs
	*/
	public int read() throws IOException {

	// decode character
	int c = fSurrogate;
	if (fSurrogate == -1) {
	// NOTE: We use the index into the buffer if there are remaining
	// bytes from the last block read. -Ac
	int index = 0;

	// get first byte
	int b0 = index == fOffset
	? fInputStream.read() : fBuffer[index++] & 0x00FF;
	if (b0 == -1) {
	return -1;
	}

	// UTF-8: [0xxx xxxx]
	// Unicode: [0000 0000] [0xxx xxxx]
	if (b0 < 0x80) {
	c = (char)b0;
	}

	// UTF-8: [110y yyyy] [10xx xxxx]
	// Unicode: [0000 0yyy] [yyxx xxxx]
	else if ((b0 & 0xE0) == 0xC0 && (b0 & 0x1E) != 0) {
	int b1 = index == fOffset
	? fInputStream.read() : fBuffer[index++] & 0x00FF;
	if (b1 == -1) {
	expectedByte(2, 2);
	}
	if ((b1 & 0xC0) != 0x80) {
	invalidByte(2, 2, b1);
	}
	c = ((b0 << 6) & 0x07C0) \| (b1 & 0x003F);
	}

	// UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx]
	// Unicode: [zzzz yyyy] [yyxx xxxx]
	else if ((b0 & 0xF0) == 0xE0) {
	int b1 = index == fOffset
	? fInputStream.read() : fBuffer[index++] & 0x00FF;
	if (b1 == -1) {
	expectedByte(2, 3);
	}
	if ((b1 & 0xC0) != 0x80
	\|\| (b0 == 0xED && b1 >= 0xA0)
	\|\| ((b0 & 0x0F) == 0 && (b1 & 0x20) == 0)) {
	invalidByte(2, 3, b1);
	}
	int b2 = index == fOffset
	? fInputStream.read() : fBuffer[index++] & 0x00FF;
	if (b2 == -1) {
	expectedByte(3, 3);
	}
	if ((b2 & 0xC0) != 0x80) {
	invalidByte(3, 3, b2);
	}
	c = ((b0 << 12) & 0xF000) \| ((b1 << 6) & 0x0FC0) \|
	(b2 & 0x003F);
	}

	// UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
	// Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
	// [1101 11yy] [yyxx xxxx] (low surrogate)
	// * uuuuu = wwww + 1
	else if ((b0 & 0xF8) == 0xF0) {
	int b1 = index == fOffset
	? fInputStream.read() : fBuffer[index++] & 0x00FF;
	if (b1 == -1) {
	expectedByte(2, 4);
	}
	if ((b1 & 0xC0) != 0x80
	\|\| ((b1 & 0x30) == 0 && (b0 & 0x07) == 0)) {
	invalidByte(2, 3, b1);
	}
	int b2 = index == fOffset
	? fInputStream.read() : fBuffer[index++] & 0x00FF;
	if (b2 == -1) {
	expectedByte(3, 4);
	}
	if ((b2 & 0xC0) != 0x80) {
	invalidByte(3, 3, b2);
	}
	int b3 = index == fOffset
	? fInputStream.read() : fBuffer[index++] & 0x00FF;
	if (b3 == -1) {
	expectedByte(4, 4);
	}
	if ((b3 & 0xC0) != 0x80) {
	invalidByte(4, 4, b3);
	}
	int uuuuu = ((b0 << 2) & 0x001C) \| ((b1 >> 4) & 0x0003);
	if (uuuuu > 0x10) {
	invalidSurrogate(uuuuu);
	}
	int wwww = uuuuu - 1;
	int hs = 0xD800 \|
	((wwww << 6) & 0x03C0) \| ((b1 << 2) & 0x003C) \|
	((b2 >> 4) & 0x0003);
	int ls = 0xDC00 \| ((b2 << 6) & 0x03C0) \| (b3 & 0x003F);
	c = hs;
	fSurrogate = ls;
	}

	// error
	else {
	invalidByte(1, 1, b0);
	}
	}

	// use surrogate
	else {
	fSurrogate = -1;
	}

	// return character
	if (DEBUG_READ) {
	System.out.println("read(): 0x"+Integer.toHexString(c));
	}
	return c;

	} // read():int

	/**
	* Read characters into a portion of an array. This method will block
	* until some input is available, an I/O error occurs, or the end of the
	* stream is reached.
	*
	* @param ch Destination buffer
	* @param offset Offset at which to start storing characters
	* @param length Maximum number of characters to read
	*
	* @return The number of characters read, or -1 if the end of the
	* stream has been reached
	*
	* @exception IOException If an I/O error occurs
	*/
	public int read(char ch[], int offset, int length) throws IOException {

	// handle surrogate
	int out = offset;
	if (fSurrogate != -1) {
	ch[offset + 1] = (char)fSurrogate;
	fSurrogate = -1;
	length--;
	out++;
	}

	// read bytes
	int count = 0;
	if (fOffset == 0) {
	// adjust length to read
	if (length > fBuffer.length) {
	length = fBuffer.length;
	}

	// perform read operation
	count = fInputStream.read(fBuffer, 0, length);
	if (count == -1) {
	return -1;
	}
	count += out - offset;
	}

	// skip read; last character was in error
	// NOTE: Having an offset value other than zero means that there was
	// an error in the last character read. In this case, we have
	// skipped the read so we don't consume any bytes past the
	// error. By signalling the error on the next block read we
	// allow the method to return the most valid characters that
	// it can on the previous block read. -Ac
	else {
	count = fOffset;
	fOffset = 0;
	}

	// convert bytes to characters
	final int total = count;
	int in;
	byte byte1;
	final byte byte0 = 0;
	for (in = 0; in < total; in++) {
	byte1 = fBuffer[in];
	if (byte1 >= byte0) {
	ch[out++] = (char)byte1;
	}
	else {
	break;
	}
	}
	for ( ; in < total; in++) {
	byte1 = fBuffer[in];

	// UTF-8: [0xxx xxxx]
	// Unicode: [0000 0000] [0xxx xxxx]
	if (byte1 >= byte0) {
	ch[out++] = (char)byte1;
	continue;
	}

	// UTF-8: [110y yyyy] [10xx xxxx]
	// Unicode: [0000 0yyy] [yyxx xxxx]
	int b0 = byte1 & 0x0FF;
	if ((b0 & 0xE0) == 0xC0 && (b0 & 0x1E) != 0) {
	int b1 = -1;
	if (++in < total) {
	b1 = fBuffer[in] & 0x00FF;
	}
	else {
	b1 = fInputStream.read();
	if (b1 == -1) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fOffset = 1;
	return out - offset;
	}
	expectedByte(2, 2);
	}
	count++;
	}
	if ((b1 & 0xC0) != 0x80) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fOffset = 2;
	return out - offset;
	}
	invalidByte(2, 2, b1);
	}
	int c = ((b0 << 6) & 0x07C0) \| (b1 & 0x003F);
	ch[out++] = (char)c;
	count -= 1;
	continue;
	}

	// UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx]
	// Unicode: [zzzz yyyy] [yyxx xxxx]
	if ((b0 & 0xF0) == 0xE0) {
	int b1 = -1;
	if (++in < total) {
	b1 = fBuffer[in] & 0x00FF;
	}
	else {
	b1 = fInputStream.read();
	if (b1 == -1) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fOffset = 1;
	return out - offset;
	}
	expectedByte(2, 3);
	}
	count++;
	}
	if ((b1 & 0xC0) != 0x80
	\|\| (b0 == 0xED && b1 >= 0xA0)
	\|\| ((b0 & 0x0F) == 0 && (b1 & 0x20) == 0)) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fOffset = 2;
	return out - offset;
	}
	invalidByte(2, 3, b1);
	}
	int b2 = -1;
	if (++in < total) {
	b2 = fBuffer[in] & 0x00FF;
	}
	else {
	b2 = fInputStream.read();
	if (b2 == -1) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fOffset = 2;
	return out - offset;
	}
	expectedByte(3, 3);
	}
	count++;
	}
	if ((b2 & 0xC0) != 0x80) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fBuffer[2] = (byte)b2;
	fOffset = 3;
	return out - offset;
	}
	invalidByte(3, 3, b2);
	}
	int c = ((b0 << 12) & 0xF000) \| ((b1 << 6) & 0x0FC0) \|
	(b2 & 0x003F);
	ch[out++] = (char)c;
	count -= 2;
	continue;
	}

	// UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
	// Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
	// [1101 11yy] [yyxx xxxx] (low surrogate)
	// * uuuuu = wwww + 1
	if ((b0 & 0xF8) == 0xF0) {
	int b1 = -1;
	if (++in < total) {
	b1 = fBuffer[in] & 0x00FF;
	}
	else {
	b1 = fInputStream.read();
	if (b1 == -1) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fOffset = 1;
	return out - offset;
	}
	expectedByte(2, 4);
	}
	count++;
	}
	if ((b1 & 0xC0) != 0x80
	\|\| ((b1 & 0x30) == 0 && (b0 & 0x07) == 0)) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fOffset = 2;
	return out - offset;
	}
	invalidByte(2, 4, b1);
	}
	int b2 = -1;
	if (++in < total) {
	b2 = fBuffer[in] & 0x00FF;
	}
	else {
	b2 = fInputStream.read();
	if (b2 == -1) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fOffset = 2;
	return out - offset;
	}
	expectedByte(3, 4);
	}
	count++;
	}
	if ((b2 & 0xC0) != 0x80) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fBuffer[2] = (byte)b2;
	fOffset = 3;
	return out - offset;
	}
	invalidByte(3, 4, b2);
	}
	int b3 = -1;
	if (++in < total) {
	b3 = fBuffer[in] & 0x00FF;
	}
	else {
	b3 = fInputStream.read();
	if (b3 == -1) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fBuffer[2] = (byte)b2;
	fOffset = 3;
	return out - offset;
	}
	expectedByte(4, 4);
	}
	count++;
	}
	if ((b3 & 0xC0) != 0x80) {
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fBuffer[2] = (byte)b2;
	fBuffer[3] = (byte)b3;
	fOffset = 4;
	return out - offset;
	}
	invalidByte(4, 4, b2);
	}

	// check if output buffer is large enough to hold 2 surrogate chars
	if (out + 1 >= ch.length) {
	fBuffer[0] = (byte)b0;
	fBuffer[1] = (byte)b1;
	fBuffer[2] = (byte)b2;
	fBuffer[3] = (byte)b3;
	fOffset = 4;
	return out - offset;
	}

	// decode bytes into surrogate characters
	int uuuuu = ((b0 << 2) & 0x001C) \| ((b1 >> 4) & 0x0003);
	if (uuuuu > 0x10) {
	invalidSurrogate(uuuuu);
	}
	int wwww = uuuuu - 1;
	int zzzz = b1 & 0x000F;
	int yyyyyy = b2 & 0x003F;
	int xxxxxx = b3 & 0x003F;
	int hs = 0xD800 \| ((wwww << 6) & 0x03C0) \| (zzzz << 2) \| (yyyyyy >> 4);
	int ls = 0xDC00 \| ((yyyyyy << 6) & 0x03C0) \| xxxxxx;

	// set characters
	ch[out++] = (char)hs;
	ch[out++] = (char)ls;
	count -= 2;
	continue;
	}

	// error
	if (out > offset) {
	fBuffer[0] = (byte)b0;
	fOffset = 1;
	return out - offset;
	}
	invalidByte(1, 1, b0);
	}

	// return number of characters converted
	if (DEBUG_READ) {
	System.out.println("read(char[],"+offset+','+length+"): count="+count);
	}
	return count;

	} // read(char[],int,int)

	/**
	* Skip characters. This method will block until some characters are
	* available, an I/O error occurs, or the end of the stream is reached.
	*
	* @param n The number of characters to skip
	*
	* @return The number of characters actually skipped
	*
	* @exception IOException If an I/O error occurs
	*/
	public long skip(long n) throws IOException {

	long remaining = n;
	final char[] ch = new char[fBuffer.length];
	do {
	int length = ch.length < remaining ? ch.length : (int)remaining;
	int count = read(ch, 0, length);
	if (count > 0) {
	remaining -= count;
	}
	else {
	break;
	}
	} while (remaining > 0);

	long skipped = n - remaining;
	return skipped;

	} // skip(long):long

	/**
	* Tell whether this stream is ready to be read.
	*
	* @return True if the next read() is guaranteed not to block for input,
	* false otherwise. Note that returning false does not guarantee that the
	* next read will block.
	*
	* @exception IOException If an I/O error occurs
	*/
	public boolean ready() throws IOException {
	return false;
	} // ready()

	/**
	* Tell whether this stream supports the mark() operation.
	*/
	public boolean markSupported() {
	return false;
	} // markSupported()

	/**
	* Mark the present position in the stream. Subsequent calls to reset()
	* will attempt to reposition the stream to this point. Not all
	* character-input streams support the mark() operation.
	*
	* @param readAheadLimit Limit on the number of characters that may be
	* read while still preserving the mark. After
	* reading this many characters, attempting to
	* reset the stream may fail.
	*
	* @exception IOException If the stream does not support mark(),
	* or if some other I/O error occurs
	*/
	public void mark(int readAheadLimit) throws IOException {
	throw new IOException(fFormatter.formatMessage(fLocale, "OperationNotSupported", new Object[]{"mark()", "UTF-8"}));
	} // mark(int)

	/**
	* Reset the stream. If the stream has been marked, then attempt to
	* reposition it at the mark. If the stream has not been marked, then
	* attempt to reset it in some way appropriate to the particular stream,
	* for example by repositioning it to its starting point. Not all
	* character-input streams support the reset() operation, and some support
	* reset() without supporting mark().
	*
	* @exception IOException If the stream has not been marked,
	* or if the mark has been invalidated,
	* or if the stream does not support reset(),
	* or if some other I/O error occurs
	*/
	public void reset() throws IOException {
	fOffset = 0;
	fSurrogate = -1;
	} // reset()

	/**
	* Close the stream. Once a stream has been closed, further read(),
	* ready(), mark(), or reset() invocations will throw an IOException.
	* Closing a previously-closed stream, however, has no effect.
	*
	* @exception IOException If an I/O error occurs
	*/
	public void close() throws IOException {
	BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator();
	ba.returnByteBuffer(fBuffer);
	fBuffer = null;
	fInputStream.close();
	} // close()

	//
	// Private methods
	//

	/** Throws an exception for expected byte. */
	private void expectedByte(int position, int count)
	throws MalformedByteSequenceException {

	throw new MalformedByteSequenceException(fFormatter,
	fLocale,
	XMLMessageFormatter.XML_DOMAIN,
	"ExpectedByte",
	new Object[] {Integer.toString(position), Integer.toString(count)});

	} // expectedByte(int,int)

	/** Throws an exception for invalid byte. */
	private void invalidByte(int position, int count, int c)
	throws MalformedByteSequenceException {

	throw new MalformedByteSequenceException(fFormatter,
	fLocale,
	XMLMessageFormatter.XML_DOMAIN,
	"InvalidByte",
	new Object [] {Integer.toString(position), Integer.toString(count)});

	} // invalidByte(int,int,int)

	/** Throws an exception for invalid surrogate bits. */
	private void invalidSurrogate(int uuuuu) throws MalformedByteSequenceException {

	throw new MalformedByteSequenceException(fFormatter,
	fLocale,
	XMLMessageFormatter.XML_DOMAIN,
	"InvalidHighSurrogate",
	new Object[] {Integer.toHexString(uuuuu)});

	} // invalidSurrogate(int)

	} // class UTF8Reader