blob: 8b02b2e5740c7fcda770e860b386c1765db25942 [file] [log] [blame]
/*
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sun.org.apache.xerces.internal.impl;
import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
import com.sun.org.apache.xerces.internal.util.EncodingMap;
import com.sun.org.apache.xerces.internal.util.SymbolTable;
import com.sun.org.apache.xerces.internal.util.XMLChar;
import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer;
import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit;
import com.sun.org.apache.xerces.internal.xni.*;
import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
import com.sun.xml.internal.stream.Entity;
import com.sun.xml.internal.stream.Entity.ScannedEntity;
import com.sun.xml.internal.stream.XMLBufferListener;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Locale;
import java.util.Vector;
/**
* Implements the entity scanner methods.
*
* @author Neeraj Bajaj, Sun Microsystems
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
* @author K.Venugopal Sun Microsystems
*
*/
public class XMLEntityScanner implements XMLLocator {
protected Entity.ScannedEntity fCurrentEntity = null ;
protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
protected XMLEntityManager fEntityManager ;
/** Security manager. */
protected XMLSecurityManager fSecurityManager = null;
/** Limit analyzer. */
protected XMLLimitAnalyzer fLimitAnalyzer = null;
/** Debug switching readers for encodings. */
private static final boolean DEBUG_ENCODINGS = false;
/** Listeners which should know when load is being called */
private Vector listeners = new Vector();
private static final boolean [] VALID_NAMES = new boolean[127];
/**
* Debug printing of buffer. This debugging flag works best when you
* resize the DEFAULT_BUFFER_SIZE down to something reasonable like
* 64 characters.
*/
private static final boolean DEBUG_BUFFER = false;
private static final boolean DEBUG_SKIP_STRING = false;
/**
* To signal the end of the document entity, this exception will be thrown.
*/
private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
private static final long serialVersionUID = 980337771224675268L;
public Throwable fillInStackTrace() {
return this;
}
};
protected SymbolTable fSymbolTable = null;
protected XMLErrorReporter fErrorReporter = null;
int [] whiteSpaceLookup = new int[100];
int whiteSpaceLen = 0;
boolean whiteSpaceInfoNeeded = true;
/**
* Allow Java encoding names. This feature identifier is:
* http://apache.org/xml/features/allow-java-encodings
*/
protected boolean fAllowJavaEncodings;
//Will be used only during internal subsets.
//for appending data.
/** Property identifier: symbol table. */
protected static final String SYMBOL_TABLE =
Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
/** Property identifier: error reporter. */
protected static final String ERROR_REPORTER =
Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
/** Feature identifier: allow Java encodings. */
protected static final String ALLOW_JAVA_ENCODINGS =
Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
protected PropertyManager fPropertyManager = null ;
boolean isExternal = false;
static {
for(int i=0x0041;i<=0x005A ; i++){
VALID_NAMES[i]=true;
}
for(int i=0x0061;i<=0x007A; i++){
VALID_NAMES[i]=true;
}
for(int i=0x0030;i<=0x0039; i++){
VALID_NAMES[i]=true;
}
VALID_NAMES[45]=true;
VALID_NAMES[46]=true;
VALID_NAMES[58]=true;
VALID_NAMES[95]=true;
}
// SAPJVM: Remember, that the XML version has explicitly been set,
// so that XMLStreamReader.getVersion() can find that out.
boolean xmlVersionSetExplicitly = false;
// indicates that the operation is for detecting XML version
boolean detectingVersion = false;
//
// Constructors
//
/** Default constructor. */
public XMLEntityScanner() {
} // <init>()
/** private constructor, this class can only be instantiated within this class. Instance of this class should
* be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
* @see getEntityScanner()
* @see getEntityScanner(ScannedEntity)
*/
public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
fEntityManager = entityManager ;
reset(propertyManager);
} // <init>()
// set buffer size:
public final void setBufferSize(int size) {
// REVISIT: Buffer size passed to entity scanner
// was not being kept in synch with the actual size
// of the buffers in each scanned entity. If any
// of the buffers were actually resized, it was possible
// that the parser would throw an ArrayIndexOutOfBoundsException
// for documents which contained names which are longer than
// the current buffer size. Conceivably the buffer size passed
// to entity scanner could be used to determine a minimum size
// for resizing, if doubling its size is smaller than this
// minimum. -- mrglavas
fBufferSize = size;
}
/**
* Resets the components.
*/
public void reset(PropertyManager propertyManager){
fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
resetCommon();
}
/**
* Resets the component. The component can query the component manager
* about any features and properties that affect the operation of the
* component.
*
* @param componentManager The component manager.
*
* @throws SAXException Thrown by component on initialization error.
* For example, if a feature or property is
* required for the operation of the component, the
* component manager may throw a
* SAXNotRecognizedException or a
* SAXNotSupportedException.
*/
public void reset(XMLComponentManager componentManager)
throws XMLConfigurationException {
// xerces features
fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);
//xerces properties
fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
resetCommon();
} // reset(XMLComponentManager)
public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
XMLErrorReporter reporter) {
fCurrentEntity = null;
fSymbolTable = symbolTable;
fEntityManager = entityManager;
fErrorReporter = reporter;
fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
fSecurityManager = fEntityManager.fSecurityManager;
}
private void resetCommon() {
fCurrentEntity = null;
whiteSpaceLen = 0;
whiteSpaceInfoNeeded = true;
listeners.clear();
fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
fSecurityManager = fEntityManager.fSecurityManager;
}
/**
* Returns the XML version of the current entity. This will normally be the
* value from the XML or text declaration or defaulted by the parser. Note that
* that this value may be different than the version of the processing rules
* applied to the current entity. For instance, an XML 1.1 document may refer to
* XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
* document. Also note that, for a given entity, this value can only be considered
* final once the XML or text declaration has been read or once it has been
* determined that there is no such declaration.
*/
public final String getXMLVersion() {
if (fCurrentEntity != null) {
return fCurrentEntity.xmlVersion;
}
return null;
} // getXMLVersion():String
/**
* Sets the XML version. This method is used by the
* scanners to report the value of the version pseudo-attribute
* in an XML or text declaration.
*
* @param xmlVersion the XML version of the current entity
*/
public final void setXMLVersion(String xmlVersion) {
xmlVersionSetExplicitly = true; // SAPJVM
fCurrentEntity.xmlVersion = xmlVersion;
} // setXMLVersion(String)
/** set the instance of current scanned entity.
* @param ScannedEntity
*/
public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){
fCurrentEntity = scannedEntity ;
if(fCurrentEntity != null){
isExternal = fCurrentEntity.isExternal();
if(DEBUG_BUFFER)
System.out.println("Current Entity is "+scannedEntity.name);
}
}
public Entity.ScannedEntity getCurrentEntity(){
return fCurrentEntity ;
}
//
// XMLEntityReader methods
//
/**
* Returns the base system identifier of the currently scanned
* entity, or null if none is available.
*/
public final String getBaseSystemId() {
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
} // getBaseSystemId():String
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
*/
public void setBaseSystemId(String systemId) {
//no-op
}
///////////// Locator methods start.
public final int getLineNumber(){
//if the entity is closed, we should return -1
//xxx at first place why such call should be there...
return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
*/
public void setLineNumber(int line) {
//no-op
}
public final int getColumnNumber(){
//if the entity is closed, we should return -1
//xxx at first place why such call should be there...
return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
*/
public void setColumnNumber(int col) {
// no-op
}
public final int getCharacterOffset(){
return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
}
/** Returns the expanded system identifier. */
public final String getExpandedSystemId() {
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
*/
public void setExpandedSystemId(String systemId) {
//no-op
}
/** Returns the literal system identifier. */
public final String getLiteralSystemId() {
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
*/
public void setLiteralSystemId(String systemId) {
//no-op
}
/** Returns the public identifier. */
public final String getPublicId() {
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
*/
public void setPublicId(String publicId) {
//no-op
}
///////////////// Locator methods finished.
/** the version of the current entity being scanned */
public void setVersion(String version){
fCurrentEntity.version = version;
}
public String getVersion(){
if (fCurrentEntity != null)
return fCurrentEntity.version ;
return null;
}
/**
* Returns the encoding of the current entity.
* Note that, for a given entity, this value can only be
* considered final once the encoding declaration has been read (or once it
* has been determined that there is no such declaration) since, no encoding
* having been specified on the XMLInputSource, the parser
* will make an initial "guess" which could be in error.
*/
public final String getEncoding() {
if (fCurrentEntity != null) {
return fCurrentEntity.encoding;
}
return null;
} // getEncoding():String
/**
* Sets the encoding of the scanner. This method is used by the
* scanners if the XMLDecl or TextDecl line contains an encoding
* pseudo-attribute.
* <p>
* <strong>Note:</strong> The underlying character reader on the
* current entity will be changed to accomodate the new encoding.
* However, the new encoding is ignored if the current reader was
* not constructed from an input stream (e.g. an external entity
* that is resolved directly to the appropriate java.io.Reader
* object).
*
* @param encoding The IANA encoding name of the new encoding.
*
* @throws IOException Thrown if the new encoding is not supported.
*
* @see com.sun.org.apache.xerces.internal.util.EncodingMap
*/
public final void setEncoding(String encoding) throws IOException {
if (DEBUG_ENCODINGS) {
System.out.println("$$$ setEncoding: "+encoding);
}
if (fCurrentEntity.stream != null) {
// if the encoding is the same, don't change the reader and
// re-use the original reader used by the OneCharReader
// NOTE: Besides saving an object, this overcomes deficiencies
// in the UTF-16 reader supplied with the standard Java
// distribution (up to and including 1.3). The UTF-16
// decoder buffers 8K blocks even when only asked to read
// a single char! -Ac
if (fCurrentEntity.encoding == null ||
!fCurrentEntity.encoding.equals(encoding)) {
// UTF-16 is a bit of a special case. If the encoding is UTF-16,
// and we know the endian-ness, we shouldn't change readers.
// If it's ISO-10646-UCS-(2|4), then we'll have to deduce
// the endian-ness from the encoding we presently have.
if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
if(ENCODING.equals("UTF-16")) return;
if(ENCODING.equals("ISO-10646-UCS-4")) {
if(fCurrentEntity.encoding.equals("UTF-16BE")) {
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
} else {
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
}
return;
}
if(ENCODING.equals("ISO-10646-UCS-2")) {
if(fCurrentEntity.encoding.equals("UTF-16BE")) {
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
} else {
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
}
return;
}
}
// wrap a new reader around the input stream, changing
// the encoding
if (DEBUG_ENCODINGS) {
System.out.println("$$$ creating new reader from stream: "+
fCurrentEntity.stream);
}
//fCurrentEntity.stream.reset();
fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
fCurrentEntity.encoding = encoding;
} else {
if (DEBUG_ENCODINGS)
System.out.println("$$$ reusing old reader on stream");
}
}
} // setEncoding(String)
/** Returns true if the current entity being scanned is external. */
public final boolean isExternal() {
return fCurrentEntity.isExternal();
} // isExternal():boolean
public int getChar(int relative) throws IOException{
if(arrangeCapacity(relative + 1, false)){
return fCurrentEntity.ch[fCurrentEntity.position + relative];
}else{
return -1;
}
}//getChar()
/**
* Returns the next character on the input.
* <p>
* <strong>Note:</strong> The character is <em>not</em> consumed.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
public int peekChar() throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(peekChar: ");
print();
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
}
// peek at character
int c = fCurrentEntity.ch[fCurrentEntity.position];
// return peeked character
if (DEBUG_BUFFER) {
System.out.print(")peekChar: ");
print();
if (isExternal) {
System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
} else {
System.out.println(" -> '"+(char)c+"'");
}
}
if (isExternal) {
return c != '\r' ? c : '\n';
} else {
return c;
}
} // peekChar():int
/**
* Returns the next character on the input.
* <p>
* <strong>Note:</strong> The character is consumed.
*
* @param nt The type of the name (element or attribute)
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
protected int scanChar(NameType nt) throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(scanChar: ");
print();
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
}
// scan character
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\n' ||
(c == '\r' && isExternal)) {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
invokeListeners(1);
fCurrentEntity.ch[0] = (char)c;
load(1, false, false);
offset = 0;
}
if (c == '\r' && isExternal) {
if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
fCurrentEntity.position--;
}
c = '\n';
}
}
// return character that was scanned
if (DEBUG_BUFFER) {
System.out.print(")scanChar: ");
print();
System.out.println(" -> '"+(char)c+"'");
}
fCurrentEntity.columnNumber++;
if (!detectingVersion) {
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
}
return c;
} // scanChar():int
/**
* Returns a string matching the NMTOKEN production appearing immediately
* on the input as a symbol, or null if NMTOKEN Name string is present.
* <p>
* <strong>Note:</strong> The NMTOKEN characters are consumed.
* <p>
* <strong>Note:</strong> The string returned must be a symbol. The
* SymbolTable can be used for this purpose.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.SymbolTable
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
*/
protected String scanNmtoken() throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(scanNmtoken: ");
print();
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
}
// scan nmtoken
int offset = fCurrentEntity.position;
boolean vc = false;
char c;
while (true){
//while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
c = fCurrentEntity.ch[fCurrentEntity.position];
if(c < 127){
vc = VALID_NAMES[c];
}else{
vc = XMLChar.isName(c);
}
if(!vc)break;
if (++fCurrentEntity.position == fCurrentEntity.count) {
int length = fCurrentEntity.position - offset;
invokeListeners(length);
if (length == fCurrentEntity.fBufferSize) {
// bad luck we have to resize our buffer
char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
System.arraycopy(fCurrentEntity.ch, offset,
tmp, 0, length);
fCurrentEntity.ch = tmp;
fCurrentEntity.fBufferSize *= 2;
} else {
System.arraycopy(fCurrentEntity.ch, offset,
fCurrentEntity.ch, 0, length);
}
offset = 0;
if (load(length, false, false)) {
break;
}
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length;
// return nmtoken
String symbol = null;
if (length > 0) {
symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
}
if (DEBUG_BUFFER) {
System.out.print(")scanNmtoken: ");
print();
System.out.println(" -> "+String.valueOf(symbol));
}
return symbol;
} // scanNmtoken():String
/**
* Returns a string matching the Name production appearing immediately
* on the input as a symbol, or null if no Name string is present.
* <p>
* <strong>Note:</strong> The Name characters are consumed.
* <p>
* <strong>Note:</strong> The string returned must be a symbol. The
* SymbolTable can be used for this purpose.
*
* @param nt The type of the name (element or attribute)
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.SymbolTable
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
*/
protected String scanName(NameType nt) throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(scanName: ");
print();
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
}
// scan name
int offset = fCurrentEntity.position;
int length;
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
if (++fCurrentEntity.position == fCurrentEntity.count) {
invokeListeners(1);
fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
offset = 0;
if (load(1, false, false)) {
fCurrentEntity.columnNumber++;
String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
if (DEBUG_BUFFER) {
System.out.print(")scanName: ");
print();
System.out.println(" -> "+String.valueOf(symbol));
}
return symbol;
}
}
boolean vc =false;
while (true ){
//XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
char c = fCurrentEntity.ch[fCurrentEntity.position];
if(c < 127){
vc = VALID_NAMES[c];
}else{
vc = XMLChar.isName(c);
}
if(!vc)break;
if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) {
offset = 0;
if (load(length, false, false)) {
break;
}
}
}
}
length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length;
// return name
String symbol;
if (length > 0) {
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
checkEntityLimit(nt, fCurrentEntity, offset, length);
symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
} else
symbol = null;
if (DEBUG_BUFFER) {
System.out.print(")scanName: ");
print();
System.out.println(" -> "+String.valueOf(symbol));
}
return symbol;
} // scanName():String
/**
* Scans a qualified name from the input, setting the fields of the
* QName structure appropriately.
* <p>
* <strong>Note:</strong> The qualified name characters are consumed.
* <p>
* <strong>Note:</strong> The strings used to set the values of the
* QName structure must be symbols. The SymbolTable can be used for
* this purpose.
*
* @param qname The qualified name structure to fill.
* @param nt The type of the name (element or attribute)
*
* @return Returns true if a qualified name appeared immediately on
* the input and was scanned, false otherwise.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.SymbolTable
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
*/
protected boolean scanQName(QName qname, NameType nt) throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(scanQName, "+qname+": ");
print();
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
}
// scan qualified name
int offset = fCurrentEntity.position;
//making a check if if the specified character is a valid name start character
//as defined by production [5] in the XML 1.0 specification.
// Name ::= (Letter | '_' | ':') (NameChar)*
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
if (++fCurrentEntity.position == fCurrentEntity.count) {
invokeListeners(1);
fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
offset = 0;
if (load(1, false, false)) {
fCurrentEntity.columnNumber++;
//adding into symbol table.
//XXX We are trying to add single character in SymbolTable??????
String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
qname.setValues(null, name, name, null);
if (DEBUG_BUFFER) {
System.out.print(")scanQName, "+qname+": ");
print();
System.out.println(" -> true");
}
checkEntityLimit(nt, fCurrentEntity, 0, 1);
return true;
}
}
int index = -1;
boolean vc = false;
int length;
while ( true){
//XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
char c = fCurrentEntity.ch[fCurrentEntity.position];
if(c < 127){
vc = VALID_NAMES[c];
}else{
vc = XMLChar.isName(c);
}
if(!vc)break;
if (c == ':') {
if (index != -1) {
break;
}
index = fCurrentEntity.position;
//check prefix before further read
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset);
}
if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) {
if (index != -1) {
index = index - offset;
}
offset = 0;
if (load(length, false, false)) {
break;
}
}
}
length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length;
if (length > 0) {
String prefix = null;
String localpart = null;
String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
offset, length);
if (index != -1) {
int prefixLength = index - offset;
//check the result: prefix
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength);
prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
offset, prefixLength);
int len = length - prefixLength - 1;
//check the result: localpart
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len);
localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
index + 1, len);
} else {
localpart = rawname;
//check the result: localpart
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
}
qname.setValues(prefix, localpart, rawname, null);
if (DEBUG_BUFFER) {
System.out.print(")scanQName, "+qname+": ");
print();
System.out.println(" -> true");
}
checkEntityLimit(nt, fCurrentEntity, offset, length);
return true;
}
}
// no qualified name found
if (DEBUG_BUFFER) {
System.out.print(")scanQName, "+qname+": ");
print();
System.out.println(" -> false");
}
return false;
} // scanQName(QName):boolean
/**
* Checks whether the end of the entity buffer has been reached. If yes,
* checks against the limit and buffer size before loading more characters.
*
* @param entity the current entity
* @param offset the offset from which the current read was started
* @param nameOffset the offset from which the current name starts
* @return the length of characters scanned before the end of the buffer,
* zero if there is more to be read in the buffer
*/
protected int checkBeforeLoad(Entity.ScannedEntity entity, int offset,
int nameOffset) throws IOException {
int length = 0;
if (++entity.position == entity.count) {
length = entity.position - offset;
int nameLength = length;
if (nameOffset != -1) {
nameOffset = nameOffset - offset;
nameLength = length - nameOffset;
} else {
nameOffset = offset;
}
//check limit before loading more data
checkLimit(Limit.MAX_NAME_LIMIT, entity, nameOffset, nameLength);
invokeListeners(length);
if (length == entity.ch.length) {
// bad luck we have to resize our buffer
char[] tmp = new char[entity.fBufferSize * 2];
System.arraycopy(entity.ch, offset, tmp, 0, length);
entity.ch = tmp;
entity.fBufferSize *= 2;
}
else {
System.arraycopy(entity.ch, offset, entity.ch, 0, length);
}
}
return length;
}
/**
* If the current entity is an Entity reference, check the accumulated size
* against the limit.
*
* @param nt type of name (element, attribute or entity)
* @param entity The current entity
* @param offset The index of the first byte
* @param length The length of the entity scanned
*/
protected void checkEntityLimit(NameType nt, ScannedEntity entity, int offset, int length) {
if (entity == null || !entity.isGE) {
return;
}
if (nt != NameType.REFERENCE) {
checkLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT, entity, offset, length);
}
if (nt == NameType.ELEMENTSTART || nt == NameType.ATTRIBUTENAME) {
checkNodeCount(entity);
}
}
/**
* If the current entity is an Entity reference, counts the total nodes in
* the entity and checks the accumulated value against the limit.
*
* @param entity The current entity
*/
protected void checkNodeCount(ScannedEntity entity) {
if (entity != null && entity.isGE) {
checkLimit(Limit.ENTITY_REPLACEMENT_LIMIT, entity, 0, 1);
}
}
/**
* Checks whether the value of the specified Limit exceeds its limit
*
* @param limit The Limit to be checked
* @param entity The current entity
* @param offset The index of the first byte
* @param length The length of the entity scanned
*/
protected void checkLimit(Limit limit, ScannedEntity entity, int offset, int length) {
fLimitAnalyzer.addValue(limit, entity.name, length);
if (fSecurityManager.isOverLimit(limit, fLimitAnalyzer)) {
fSecurityManager.debugPrint(fLimitAnalyzer);
Object[] e = (limit == Limit.ENTITY_REPLACEMENT_LIMIT) ?
new Object[]{fLimitAnalyzer.getValue(limit),
fSecurityManager.getLimit(limit), fSecurityManager.getStateLiteral(limit)} :
new Object[]{entity.name, fLimitAnalyzer.getValue(limit),
fSecurityManager.getLimit(limit), fSecurityManager.getStateLiteral(limit)};
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, limit.key(),
e, XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
if (fSecurityManager.isOverLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
fSecurityManager.debugPrint(fLimitAnalyzer);
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "TotalEntitySizeLimit",
new Object[]{fLimitAnalyzer.getTotalValue(Limit.TOTAL_ENTITY_SIZE_LIMIT),
fSecurityManager.getLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT),
fSecurityManager.getStateLiteral(Limit.TOTAL_ENTITY_SIZE_LIMIT)},
XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
}
/**
* CHANGED:
* Scans a range of parsed character data, This function appends the character data to
* the supplied buffer.
* <p>
* <strong>Note:</strong> The characters are consumed.
* <p>
* <strong>Note:</strong> This method does not guarantee to return
* the longest run of parsed character data. This method may return
* before markup due to reaching the end of the input buffer or any
* other reason.
* <p>
*
* @param content The content structure to fill.
*
* @return Returns the next character on the input, if known. This
* value may be -1 but this does <em>note</em> designate
* end of file.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
protected int scanContent(XMLString content) throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(scanContent: ");
print();
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
} else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
invokeListeners(0);
fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
load(1, false, false);
fCurrentEntity.position = 0;
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
boolean counted = false;
if (c == '\n' || (c == '\r' && isExternal)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && isExternal) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
checkEntityLimit(null, fCurrentEntity, offset, newlines);
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
counted = true;
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
} else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
checkEntityLimit(null, fCurrentEntity, offset, newlines);
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
counted = true;
break;
}
}
} else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
checkEntityLimit(null, fCurrentEntity, offset, length);
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
//on buffering the data..
content.setValues(fCurrentEntity.ch, offset, length);
//content.append(fCurrentEntity.ch, offset, length);
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
return -1;
}
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
}
while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (!XMLChar.isContent(c)) {
fCurrentEntity.position--;
break;
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
if (!counted) {
checkEntityLimit(null, fCurrentEntity, offset, length);
}
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
//on buffering the data..
content.setValues(fCurrentEntity.ch, offset, length);
//content.append(fCurrentEntity.ch, offset, length);
// return next character
if (fCurrentEntity.position != fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position];
// REVISIT: Does this need to be updated to fix the
// #x0D ^#x0A newline normalization problem? -Ac
if (c == '\r' && isExternal) {
c = '\n';
}
} else {
c = -1;
}
if (DEBUG_BUFFER) {
System.out.print(")scanContent: ");
print();
System.out.println(" -> '"+(char)c+"'");
}
return c;
} // scanContent(XMLString):int
/**
* Scans a range of attribute value data, setting the fields of the
* XMLString structure, appropriately.
* <p>
* <strong>Note:</strong> The characters are consumed.
* <p>
* <strong>Note:</strong> This method does not guarantee to return
* the longest run of attribute value data. This method may return
* before the quote character due to reaching the end of the input
* buffer or any other reason.
* <p>
* <strong>Note:</strong> The fields contained in the XMLString
* structure are not guaranteed to remain valid upon subsequent calls
* to the entity scanner. Therefore, the caller is responsible for
* immediately using the returned character data or making a copy of
* the character data.
*
* @param quote The quote character that signifies the end of the
* attribute value data.
* @param content The content structure to fill.
* @param isNSURI a flag indicating whether the content is a Namespace URI
*
* @return Returns the next character on the input, if known. This
* value may be -1 but this does <em>note</em> designate
* end of file.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
protected int scanLiteral(int quote, XMLString content, boolean isNSURI)
throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(scanLiteral, '"+(char)quote+"': ");
print();
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
} else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
invokeListeners(0);
fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
load(1, false, false);
fCurrentEntity.position = 0;
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
if(whiteSpaceInfoNeeded)
whiteSpaceLen=0;
if (c == '\n' || (c == '\r' && isExternal)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && isExternal) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
/***/
} else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
break;
}
}
/*** NEWLINE NORMALIZATION ***
* if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
* && external) {
* fCurrentEntity.position++;
* offset++;
* }
* /***/
} else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
int i=0;
for ( i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
storeWhiteSpace(i);
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
content.setValues(fCurrentEntity.ch, offset, length);
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
return -1;
}
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
}
// scan literal value
for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
c = fCurrentEntity.ch[fCurrentEntity.position];
if ((c == quote &&
(!fCurrentEntity.literal || isExternal)) ||
c == '%' || !XMLChar.isContent(c)) {
break;
}
if (whiteSpaceInfoNeeded && c == '\t') {
storeWhiteSpace(fCurrentEntity.position);
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
checkEntityLimit(null, fCurrentEntity, offset, length);
if (isNSURI) {
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
}
content.setValues(fCurrentEntity.ch, offset, length);
// return next character
if (fCurrentEntity.position != fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position];
// NOTE: We don't want to accidentally signal the
// end of the literal if we're expanding an
// entity appearing in the literal. -Ac
if (c == quote && fCurrentEntity.literal) {
c = -1;
}
} else {
c = -1;
}
if (DEBUG_BUFFER) {
System.out.print(")scanLiteral, '"+(char)quote+"': ");
print();
System.out.println(" -> '"+(char)c+"'");
}
return c;
} // scanLiteral(int,XMLString):int
/**
* Save whitespace information. Increase the whitespace buffer by 100
* when needed.
*
* For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR).
*
* @param whiteSpacePos position of a whitespace in the scanner entity buffer
*/
private void storeWhiteSpace(int whiteSpacePos) {
if (whiteSpaceLen >= whiteSpaceLookup.length) {
int [] tmp = new int[whiteSpaceLookup.length + 100];
System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
whiteSpaceLookup = tmp;
}
whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos;
}
//CHANGED:
/**
* Scans a range of character data up to the specified delimiter,
* setting the fields of the XMLString structure, appropriately.
* <p>
* <strong>Note:</strong> The characters are consumed.
* <p>
* <strong>Note:</strong> This assumes that the length of the delimiter
* and that the delimiter contains at least one character.
* <p>
* <strong>Note:</strong> This method does not guarantee to return
* the longest run of character data. This method may return before
* the delimiter due to reaching the end of the input buffer or any
* other reason.
* <p>
* @param delimiter The string that signifies the end of the character
* data to be scanned.
* @param buffer The XMLStringBuffer to fill.
*
* @return Returns true if there is more data to scan, false otherwise.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
protected boolean scanData(String delimiter, XMLStringBuffer buffer)
throws IOException {
boolean done = false;
int delimLen = delimiter.length();
char charAt0 = delimiter.charAt(0);
do {
if (DEBUG_BUFFER) {
System.out.print("(scanData: ");
print();
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, false);
}
boolean bNextEntity = false;
while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
&& (!bNextEntity))
{
System.arraycopy(fCurrentEntity.ch,
fCurrentEntity.position,
fCurrentEntity.ch,
0,
fCurrentEntity.count - fCurrentEntity.position);
bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false);
fCurrentEntity.position = 0;
fCurrentEntity.startPosition = 0;
}
if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
// something must be wrong with the input: e.g., file ends in an unterminated comment
int length = fCurrentEntity.count - fCurrentEntity.position;
checkEntityLimit(NameType.COMMENT, fCurrentEntity, fCurrentEntity.position, length);
buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
fCurrentEntity.columnNumber += fCurrentEntity.count;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = fCurrentEntity.count;
fCurrentEntity.startPosition = fCurrentEntity.count;
load(0, true, false);
return false;
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
if (c == '\n' || (c == '\r' && isExternal)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && isExternal) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
} else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
fCurrentEntity.count = newlines;
if (load(newlines, false, true)) {
break;
}
}
} else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
buffer.append(fCurrentEntity.ch, offset, length);
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
return true;
}
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
}
// iterate over buffer looking for delimiter
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == charAt0) {
// looks like we just hit the delimiter
int delimOffset = fCurrentEntity.position - 1;
for (int i = 1; i < delimLen; i++) {
if (fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.position -= i;
break OUTER;
}
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (delimiter.charAt(i) != c) {
fCurrentEntity.position -= i;
break;
}
}
if (fCurrentEntity.position == delimOffset + delimLen) {
done = true;
break;
}
} else if (c == '\n' || (isExternal && c == '\r')) {
fCurrentEntity.position--;
break;
} else if (XMLChar.isInvalid(c)) {
fCurrentEntity.position--;
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
buffer.append(fCurrentEntity.ch, offset, length);
return true;
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
if (done) {
length -= delimLen;
}
buffer.append(fCurrentEntity.ch, offset, length);
// return true if string was skipped
if (DEBUG_BUFFER) {
System.out.print(")scanData: ");
print();
System.out.println(" -> " + done);
}
} while (!done);
return !done;
} // scanData(String,XMLString)
/**
* Skips a character appearing immediately on the input.
* <p>
* <strong>Note:</strong> The character is consumed only if it matches
* the specified character.
*
* @param c The character to skip.
* @param nt The type of the name (element or attribute)
*
* @return Returns true if the character was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
protected boolean skipChar(int c, NameType nt) throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(skipChar, '"+(char)c+"': ");
print();
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
}
// skip character
int offset = fCurrentEntity.position;
int cc = fCurrentEntity.ch[fCurrentEntity.position];
if (cc == c) {
fCurrentEntity.position++;
if (c == '\n') {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
} else {
fCurrentEntity.columnNumber++;
}
if (DEBUG_BUFFER) {
System.out.print(")skipChar, '"+(char)c+"': ");
print();
System.out.println(" -> true");
}
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
return true;
} else if (c == '\n' && cc == '\r' && isExternal) {
// handle newlines
if (fCurrentEntity.position == fCurrentEntity.count) {
invokeListeners(1);
fCurrentEntity.ch[0] = (char)cc;
load(1, false, false);
}
fCurrentEntity.position++;
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
}
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (DEBUG_BUFFER) {
System.out.print(")skipChar, '"+(char)c+"': ");
print();
System.out.println(" -> true");
}
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
return true;
}
// character was not skipped
if (DEBUG_BUFFER) {
System.out.print(")skipChar, '"+(char)c+"': ");
print();
System.out.println(" -> false");
}
return false;
} // skipChar(int):boolean
public boolean isSpace(char ch){
return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r');
}
/**
* Skips space characters appearing immediately on the input.
* <p>
* <strong>Note:</strong> The characters are consumed only if they are
* space characters.
*
* @return Returns true if at least one space character was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
*/
protected boolean skipSpaces() throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(skipSpaces: ");
print();
System.out.println();
}
//boolean entityChanged = false;
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
}
//we are doing this check only in skipSpace() because it is called by
//fMiscDispatcher and we want the parser to exit gracefully when document
//is well-formed.
//it is possible that end of document is reached and
//fCurrentEntity becomes null
//nothing was read so entity changed 'false' should be returned.
if(fCurrentEntity == null){
return false ;
}
// skip spaces
int c = fCurrentEntity.ch[fCurrentEntity.position];
int offset = fCurrentEntity.position - 1;
if (XMLChar.isSpace(c)) {
do {
boolean entityChanged = false;
// handle newlines
if (c == '\n' || (isExternal && c == '\r')) {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
invokeListeners(0);
fCurrentEntity.ch[0] = (char)c;
entityChanged = load(1, true, false);
if (!entityChanged){
// the load change the position to be 1,
// need to restore it when entity not changed
fCurrentEntity.position = 0;
}else if(fCurrentEntity == null){
return true ;
}
}
if (c == '\r' && isExternal) {
// REVISIT: Does this need to be updated to fix the
// #x0D ^#x0A newline normalization problem? -Ac
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
fCurrentEntity.position--;
}
}
} else {
fCurrentEntity.columnNumber++;
}
//If this is a general entity, spaces within a start element should be counted
checkEntityLimit(null, fCurrentEntity, offset, fCurrentEntity.position - offset);
offset = fCurrentEntity.position;
// load more characters, if needed
if (!entityChanged){
fCurrentEntity.position++;
}
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, true);
//we are doing this check only in skipSpace() because it is called by
//fMiscDispatcher and we want the parser to exit gracefully when document
//is well-formed.
//it is possible that end of document is reached and
//fCurrentEntity becomes null
//nothing was read so entity changed 'false' should be returned.
if(fCurrentEntity == null){
return true ;
}
}
} while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
if (DEBUG_BUFFER) {
System.out.print(")skipSpaces: ");
print();
System.out.println(" -> true");
}
return true;
}
// no spaces were found
if (DEBUG_BUFFER) {
System.out.print(")skipSpaces: ");
print();
System.out.println(" -> false");
}
return false;
} // skipSpaces():boolean
/**
* @param length This function checks that following number of characters are available.
* to the underlying buffer.
* @return This function returns true if capacity asked is available.
*/
public boolean arrangeCapacity(int length) throws IOException{
return arrangeCapacity(length, false);
}
/**
* @param length This function checks that following number of characters are available.
* to the underlying buffer.
* @param changeEntity a flag to indicate that the underlying function should change the entity
* @return This function returns true if capacity asked is available.
*
*/
public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{
//check if the capacity is availble in the current buffer
//count is no. of characters in the buffer [x][m][l]
//position is '0' based
//System.out.println("fCurrent Entity " + fCurrentEntity);
if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
return true;
}
if(DEBUG_SKIP_STRING){
System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
System.out.println("length = " + length);
}
boolean entityChanged = false;
//load more characters -- this function shouldn't change the entity
while((fCurrentEntity.count - fCurrentEntity.position) < length){
if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){
invokeListeners(0);
System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position;
fCurrentEntity.position = 0;
}
if((fCurrentEntity.count - fCurrentEntity.position) < length){
int pos = fCurrentEntity.position;
invokeListeners(pos);
entityChanged = load(fCurrentEntity.count, changeEntity, false);
fCurrentEntity.position = pos;
if(entityChanged)break;
}
if(DEBUG_SKIP_STRING){
System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
System.out.println("length = " + length);
}
}
//load changes the position.. set it back to the point where we started.
//after loading check again.
if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
return true;
} else {
return false;
}
}
/**
* Skips the specified string appearing immediately on the input.
* <p>
* <strong>Note:</strong> The characters are consumed only if all
* the characters are skipped.
*
* @param s The string to skip.
*
* @return Returns true if the string was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
protected boolean skipString(String s) throws IOException {
final int length = s.length();
//first make sure that required capacity is avaible
if(arrangeCapacity(length, false)){
final int beforeSkip = fCurrentEntity.position ;
int afterSkip = fCurrentEntity.position + length - 1 ;
if(DEBUG_SKIP_STRING){
System.out.println("skipString,length = " + s + "," + length);
System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length));
}
//s.charAt() indexes are 0 to 'Length -1' based.
int i = length - 1 ;
//check from reverse
while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){
if(afterSkip-- == beforeSkip){
fCurrentEntity.position = fCurrentEntity.position + length ;
fCurrentEntity.columnNumber += length;
if (!detectingVersion) {
checkEntityLimit(null, fCurrentEntity, beforeSkip, length);
}
return true;
}
}
}
return false;
} // skipString(String):boolean
protected boolean skipString(char [] s) throws IOException {
final int length = s.length;
//first make sure that required capacity is avaible
if(arrangeCapacity(length, false)){
int beforeSkip = fCurrentEntity.position ;
int afterSkip = fCurrentEntity.position + length ;
if(DEBUG_SKIP_STRING){
System.out.println("skipString,length = " + new String(s) + "," + length);
System.out.println("skipString,length = " + new String(s) + "," + length);
}
for(int i=0;i<length;i++){
if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){
return false;
}
}
fCurrentEntity.position = fCurrentEntity.position + length ;
fCurrentEntity.columnNumber += length;
if (!detectingVersion) {
checkEntityLimit(null, fCurrentEntity, beforeSkip, length);
}
return true;
}
return false;
}
//
// Locator methods
//
//
// Private methods
//
/**
* Loads a chunk of text.
*
* @param offset The offset into the character buffer to
* read the next batch of characters.
* @param changeEntity True if the load should change entities
* at the end of the entity, otherwise leave
* the current entity in place and the entity
* boundary will be signaled by the return
* value.
* @param notify Determine whether to notify listeners of
* the event
*
* @returns Returns true if the entity changed as a result of this
* load operation.
*/
final boolean load(int offset, boolean changeEntity, boolean notify)
throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(load, "+offset+": ");
print();
System.out.println();
}
if (notify) {
invokeListeners(offset);
}
//maintaing the count till last load
fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
// read characters
int length = fCurrentEntity.ch.length - offset;
if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) {
length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE;
}
if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length);
int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
if (DEBUG_BUFFER) System.out.println(" length actually read: "+count);
// reset count and position
boolean entityChanged = false;
if (count != -1) {
if (count != 0) {
// record the last count
fCurrentEntity.fLastCount = count;
fCurrentEntity.count = count + offset;
fCurrentEntity.position = offset;
}
}
// end of this entity
else {
fCurrentEntity.count = offset;
fCurrentEntity.position = offset;
entityChanged = true;
if (changeEntity) {
//notify the entity manager about the end of entity
fEntityManager.endEntity();
//return if the current entity becomes null
if(fCurrentEntity == null){
throw END_OF_DOCUMENT_ENTITY;
}
// handle the trailing edges
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, false);
}
}
}
if (DEBUG_BUFFER) {
System.out.print(")load, "+offset+": ");
print();
System.out.println();
}
return entityChanged;
} // load(int, boolean):boolean
/**
* Creates a reader capable of reading the given input stream in
* the specified encoding.
*
* @param inputStream The input stream.
* @param encoding The encoding name that the input stream is
* encoded using. If the user has specified that
* Java encoding names are allowed, then the
* encoding name may be a Java encoding name;
* otherwise, it is an ianaEncoding name.
* @param isBigEndian For encodings (like uCS-4), whose names cannot
* specify a byte order, this tells whether the order is bigEndian. null menas
* unknown or not relevant.
*
* @return Returns a reader.
*/
protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
throws IOException {
// normalize encoding name
if (encoding == null) {
encoding = "UTF-8";
}
// try to use an optimized reader
String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
if (ENCODING.equals("UTF-8")) {
if (DEBUG_ENCODINGS) {
System.out.println("$$$ creating UTF8Reader");
}
return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
}
if (ENCODING.equals("US-ASCII")) {
if (DEBUG_ENCODINGS) {
System.out.println("$$$ creating ASCIIReader");
}
return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
}
if(ENCODING.equals("ISO-10646-UCS-4")) {
if(isBigEndian != null) {
boolean isBE = isBigEndian.booleanValue();
if(isBE) {
return new UCSReader(inputStream, UCSReader.UCS4BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS4LE);
}
} else {
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
"EncodingByteOrderUnsupported",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
}
if(ENCODING.equals("ISO-10646-UCS-2")) {
if(isBigEndian != null) { // sould never happen with this encoding...
boolean isBE = isBigEndian.booleanValue();
if(isBE) {
return new UCSReader(inputStream, UCSReader.UCS2BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS2LE);
}
} else {
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
"EncodingByteOrderUnsupported",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
}
// check for valid name
boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
boolean validJava = XMLChar.isValidJavaEncoding(encoding);
if (!validIANA || (fAllowJavaEncodings && !validJava)) {
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
"EncodingDeclInvalid",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1
// because every byte is a valid ISO Latin 1 character.
// It may not translate correctly but if we failed on
// the encoding anyway, then we're expecting the content
// of the document to be bad. This will just prevent an
// invalid UTF-8 sequence to be detected. This is only
// important when continue-after-fatal-error is turned
// on. -Ac
encoding = "ISO-8859-1";
}
// try to use a Java reader
String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
if (javaEncoding == null) {
if(fAllowJavaEncodings) {
javaEncoding = encoding;
} else {
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
"EncodingDeclInvalid",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
// see comment above.
javaEncoding = "ISO8859_1";
}
}
else if (javaEncoding.equals("ASCII")) {
if (DEBUG_ENCODINGS) {
System.out.println("$$$ creating ASCIIReader");
}
return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
}
if (DEBUG_ENCODINGS) {
System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
if (javaEncoding == encoding) {
System.out.print(" (IANA encoding)");
}
System.out.println();
}
return new InputStreamReader(inputStream, javaEncoding);
} // createReader(InputStream,String, Boolean): Reader
/**
* Returns the IANA encoding name that is auto-detected from
* the bytes specified, with the endian-ness of that encoding where appropriate.
*
* @param b4 The first four bytes of the input.
* @param count The number of bytes actually read.
* @return a 2-element array: the first element, an IANA-encoding string,
* the second element a Boolean which is true iff the document is big endian, false
* if it's little-endian, and null if the distinction isn't relevant.
*/
protected Object[] getEncodingName(byte[] b4, int count) {
if (count < 2) {
return new Object[]{"UTF-8", null};
}
// UTF-16, with BOM
int b0 = b4[0] & 0xFF;
int b1 = b4[1] & 0xFF;
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
return new Object [] {"UTF-16BE", new Boolean(true)};
}
if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
return new Object [] {"UTF-16LE", new Boolean(false)};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 3) {
return new Object [] {"UTF-8", null};
}
// UTF-8 with a BOM
int b2 = b4[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
return new Object [] {"UTF-8", null};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 4) {
return new Object [] {"UTF-8", null};
}
// other encodings
int b3 = b4[3] & 0xFF;
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
// UCS-4, big endian (1234)
return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
// UCS-4, little endian (4321)
return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
}
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
// UCS-4, unusual octet order (2143)
// REVISIT: What should this be?
return new Object [] {"ISO-10646-UCS-4", null};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
// UCS-4, unusual octect order (3412)
// REVISIT: What should this be?
return new Object [] {"ISO-10646-UCS-4", null};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
// (or could turn out to be UCS-2...
// REVISIT: What should this be?
return new Object [] {"UTF-16BE", new Boolean(true)};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
// (or could turn out to be UCS-2...
return new Object [] {"UTF-16LE", new Boolean(false)};
}
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
// EBCDIC
// a la xerces1, return CP037 instead of EBCDIC here
return new Object [] {"CP037", null};
}
// default encoding
return new Object [] {"UTF-8", null};
} // getEncodingName(byte[],int):Object[]
/**
* xxx not removing endEntity() so that i remember that we need to implement it.
* Ends an entity.
*
* @throws XNIException Thrown by entity handler to signal an error.
*/
//
/** Prints the contents of the buffer. */
final void print() {
if (DEBUG_BUFFER) {
if (fCurrentEntity != null) {
System.out.print('[');
System.out.print(fCurrentEntity.count);
System.out.print(' ');
System.out.print(fCurrentEntity.position);
if (fCurrentEntity.count > 0) {
System.out.print(" \"");
for (int i = 0; i < fCurrentEntity.count; i++) {
if (i == fCurrentEntity.position) {
System.out.print('^');
}
char c = fCurrentEntity.ch[i];
switch (c) {
case '\n': {
System.out.print("\\n");
break;
}
case '\r': {
System.out.print("\\r");
break;
}
case '\t': {
System.out.print("\\t");
break;
}
case '\\': {
System.out.print("\\\\");
break;
}
default: {
System.out.print(c);
}
}
}
if (fCurrentEntity.position == fCurrentEntity.count) {
System.out.print('^');
}
System.out.print('"');
}
System.out.print(']');
System.out.print(" @ ");
System.out.print(fCurrentEntity.lineNumber);
System.out.print(',');
System.out.print(fCurrentEntity.columnNumber);
} else {
System.out.print("*NO CURRENT ENTITY*");
}
}
}
/**
* Registers the listener object and provides callback.
* @param listener listener to which call back should be provided when scanner buffer
* is being changed.
*/
public void registerListener(XMLBufferListener listener) {
if(!listeners.contains(listener))
listeners.add(listener);
}
/**
*
* @param loadPos Starting position from which new data is being loaded into scanner buffer.
*/
public void invokeListeners(int loadPos){
for(int i=0;i<listeners.size();i++){
XMLBufferListener listener =(XMLBufferListener) listeners.get(i);
listener.refresh(loadPos);
}
}
/**
* Skips space characters appearing immediately on the input that would
* match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
* normalization is performed. This is useful when scanning structures
* such as the XMLDecl and TextDecl that can only contain US-ASCII
* characters.
* <p>
* <strong>Note:</strong> The characters are consumed only if they would
* match non-terminal S before end of line normalization is performed.
*
* @return Returns true if at least one space character was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
*/
protected final boolean skipDeclSpaces() throws IOException {
if (DEBUG_BUFFER) {
System.out.print("(skipDeclSpaces: ");
//XMLEntityManager.print(fCurrentEntity);
System.out.println();
}
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, false);
}
// skip spaces
int c = fCurrentEntity.ch[fCurrentEntity.position];
if (XMLChar.isSpace(c)) {
boolean external = fCurrentEntity.isExternal();
do {
boolean entityChanged = false;
// handle newlines
if (c == '\n' || (external && c == '\r')) {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
fCurrentEntity.ch[0] = (char)c;
entityChanged = load(1, true, false);
if (!entityChanged)
// the load change the position to be 1,
// need to restore it when entity not changed
fCurrentEntity.position = 0;
}
if (c == '\r' && external) {
// REVISIT: Does this need to be updated to fix the
// #x0D ^#x0A newline normalization problem? -Ac
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
fCurrentEntity.position--;
}
}
/*** NEWLINE NORMALIZATION ***
* else {
* if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
* && external) {
* fCurrentEntity.position++;
* }
* }
* /***/
} else {
fCurrentEntity.columnNumber++;
}
// load more characters, if needed
if (!entityChanged)
fCurrentEntity.position++;
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true, false);
}
} while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
if (DEBUG_BUFFER) {
System.out.print(")skipDeclSpaces: ");
// XMLEntityManager.print(fCurrentEntity);
System.out.println(" -> true");
}
return true;
}
// no spaces were found
if (DEBUG_BUFFER) {
System.out.print(")skipDeclSpaces: ");
//XMLEntityManager.print(fCurrentEntity);
System.out.println(" -> false");
}
return false;
} // skipDeclSpaces():boolean
} // class XMLEntityScanner