blob: e6290d93c918a6a7e912c476617b25d7254b8204 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 1998, 2008 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Goh KONDOH - initial API and implementation
*******************************************************************************/
package org.eclipse.actf.model.dom.sgml;
import java.io.IOException;
import java.io.Reader;
import org.xml.sax.DocumentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
/**
* Tokenizer for document instance.
*/
class InsTokenizer implements ISGMLConstants, Locator {
private boolean extractChar = true;
private boolean extractNum = true;
private int state;
private boolean pushBacked;
public String sval;
public int ttype;
private Reader reader;
void extractCharEntity(boolean b) {
extractChar = b;
}
void extractNumEntity(boolean b) {
extractNum = b;
}
private SGMLParser parser;
/**
* Constructs InsTokenizer instance.
*
* @param reader
* reader that read SGML Documents.
*/
public InsTokenizer(Reader reader, SGMLParser parser) {
this(reader, DEFAULT, parser);
}
InsTokenizer(Reader reader, int state, SGMLParser parser) {
this.reader = reader;
this.state = state;
this.parser = parser;
pushBacked = false;
}
private boolean forward(String str, boolean ignoreCase) throws IOException {
int buf[] = new int[str.length()];
boolean ret = true;
int i;
if (!ignoreCase) {
for (i = 0; i < buf.length; i++) {
buf[i] = read();
if (buf[i] != str.charAt(i)) {
ret = false;
i++;
break;
}
}
} else {
for (i = 0; i < buf.length; i++) {
buf[i] = read();
if (Character.toUpperCase((char) buf[i]) != Character
.toUpperCase(str.charAt(i))
&& Character.toLowerCase((char) buf[i]) != Character
.toLowerCase(str.charAt(i))) {
ret = false;
i++;
break;
}
}
}
while (i > 0)
unread(buf[--i]);
return ret;
}
private int eatComment() throws IOException, ParseException {
int ch;
miscIndex = 0;
if ((ch = read()) == '-') { // <!-
if ((ch = read()) == '-') { // <!-- enter comment
while (true) {
while ((ch = read()) != '-' && ch != EOF) {
setCharToMiscBuffer(miscIndex, (char) ch);
miscIndex++;
}
if ((ch = read()) == '-') { // --
if ((ch = read()) == '-') {
// This is a very nasty case. <!-- ... ---...>
// eat comment until '>' appears.
if (miscBuffer.length <= miscIndex + 2) {
expandMiscBuffer();
}
miscBuffer[miscIndex] = miscBuffer[miscIndex + 1] = miscBuffer[miscIndex + 2] = '-';
miscIndex += 3;
storeUntil('>');
if (Character
.isWhitespace(miscBuffer[miscIndex - 1])
|| miscBuffer[miscIndex - 1] == '-'
|| miscBuffer[miscIndex - 1] == '!') {
if (miscBuffer[miscIndex - 2] == '-'
&& miscBuffer[miscIndex - 1] == '-') {
miscIndex -= 2;
}
state = DEFAULT;
sval = new String(miscBuffer, 0, miscIndex);
parser.setCharacter(miscBuffer, 0, miscIndex);
return (ttype = COMMENT);
} else {
setCharToMiscBuffer(miscIndex, '>');
miscIndex++;
continue;
}
} else if (ch == '!') { // doesn't meet specification,
// but...
if ((ch = read()) == '>') {
state = DEFAULT;
sval = new String(miscBuffer, 0, miscIndex);
parser.setCharacter(miscBuffer, 0, miscIndex);
return (ttype = COMMENT);
} else {
if (miscBuffer.length <= miscIndex + 2) {
expandMiscBuffer();
}
miscBuffer[miscIndex] = miscBuffer[miscIndex + 1] = '-';
miscBuffer[miscIndex + 2] = '!';
miscIndex += 3;
unread(ch);
continue;
}
}
int wsIndex = 0;
char ws[] = null;
if (Character.isWhitespace((char) ch)) {
// Compressed Representation.
ws = new char[256];
ws[0] = (char) ch;
ws[1] = 1;
while (Character.isWhitespace((char) ch)) {
if (ch == ws[wsIndex]) {
ws[wsIndex + 1]++;
} else {
wsIndex += 2;
ws[wsIndex] = (char) ch;
ws[wsIndex + 1] = 1;
}
ch = read();
}
}
if (ch == '>') {
state = DEFAULT;
sval = new String(miscBuffer, 0, miscIndex);
parser.setCharacter(miscBuffer, 0, miscIndex);
return (ttype = COMMENT);
} else {
if (miscBuffer.length <= miscIndex + 2) {
expandMiscBuffer();
}
miscBuffer[miscIndex] = miscBuffer[miscIndex + 1] = '-';
miscIndex += 2;
for (int i = 0; i < wsIndex; i += 2) {
for (int j = 0; j < ws[i + 1]; j++)
setCharToMiscBuffer(miscIndex, ws[i]);
miscIndex++;
}
unread(ch);
}
} else if (ch == EOF) {
parser.error(MISC_ERR, "Unexpected EOF in comment.");
for (int i = 0; i < miscBuffer.length; i++) {
if (miscBuffer[i] == '>') {
unread(miscBuffer, i + 1, miscIndex - i - 1);
state = DEFAULT;
sval = new String(miscBuffer, 0, i);
parser.setCharacter(miscBuffer, 0, miscIndex);
return (ttype = COMMENT);
}
}
throw new ParseException("Unexpected EOF in comment.");
} else {
setCharToMiscBuffer(miscIndex, '-');
miscIndex++;
setCharToMiscBuffer(miscIndex, (char) ch);
miscIndex++;
}
} // while (true) {
} else { // <!-
// if finished reading dtd, deals with <!- ... > as a comment
sval = "-" + (char) ch + eatUntil('>');
char svalChar[] = sval.toCharArray();
parser.setCharacter(svalChar, 0, svalChar.length);
return (ttype = COMMENT);
}
} else if ((ch == 'D' || ch == 'd') && forward("OCTYPE", true)) { // chech
// if
// <!DOCTYPE
unread(ch);
state = TAG;
return (ttype = MDO);
} else {
// if finished reading dtd, deals with <! ... > as a comment
sval = (char) ch + eatUntil('>');
char svalChar[] = sval.toCharArray();
parser.setCharacter(svalChar, 0, svalChar.length);
return (ttype = COMMENT);
}
}
private int etagStartIndex;
private int defaultState() throws IOException, ParseException, SAXException {
miscIndex = 0;
int ch = read();
while (Character.isWhitespace((char) ch)) {
addCharToMiscBuffer((char) ch);
ch = read();
}
if (miscIndex > 0) {
if (prsvWS) {
unread(ch);
sval = new String(miscBuffer, 0, miscIndex);
parser.setCharacter(miscBuffer, 0, miscIndex);
return (ttype = WHITESPACE);
}
}
switch (ch) {
case '<':
if (miscIndex > 0 && !prsvWS) {
DocumentHandler docHandler = parser.getDocumentHandler();
if (docHandler != null) {
unread(ch);
docHandler.ignorableWhitespace(miscBuffer, 0, miscIndex);
read();
}
}
miscIndex = 0;
if ((ch = read()) == '/') {
if (Character.isLowerCase((char) (ch = read()))
|| Character.isUpperCase((char) ch)) {
unread(ch);
state = ETAG;
etagStartIndex = miscIndex;
addCharToMiscBuffer('<');
addCharToMiscBuffer('/');
return (ttype = ETAGO);
} else {
addCharToMiscBuffer('<');
addCharToMiscBuffer('/');
return readPCDATA(ch);
}
} else if (ch == '?') {
miscIndex = 0;
while ((ch = read()) != '>' && ch != EOF) {
addCharToMiscBuffer((char) ch);
}
sval = new String(miscBuffer, 0, miscIndex);
return (ttype = PI);
} else if (ch == '!') { // declaration.
return eatComment();
} else if (Character.isLowerCase((char) ch)
|| Character.isUpperCase((char) ch)) {
unread(ch);
state = TAG;
return (ttype = STAGO);
} else {
addCharToMiscBuffer('<');
return readPCDATA(ch);
}
case EOF:
return (ttype = EOF);
default:
return readPCDATA(ch);
}
}
private int entityBuf[] = new int[64];
private int entityBufIndex;
private int rslvCharEnt() throws IOException, ParseException, SAXException {
entityBufIndex = 0;
int ch = entityBuf[entityBufIndex++] = read();
if (ch == '#') {
ch = entityBuf[entityBufIndex++] = read();
int radix = 10;
if (ch == 'x' || ch == 'X') {
radix = 16;
ch = entityBuf[entityBufIndex++] = read();
}
int k = Character.digit((char) ch, radix);
if (k >= 0) {
int num = k;
ch = entityBuf[entityBufIndex++] = read();
while ((k = Character.digit((char) ch, radix)) >= 0) {
num = num * radix + k;
ch = entityBuf[entityBufIndex++] = read();
}
if (ch == ';') {
char orig[] = new char[entityBufIndex - 1];
for (int i = 0; i < entityBufIndex - 1; i++) {
orig[i] = (char) entityBuf[i];
}
parser.putCharNumEntity(new Character((char) num),
new String(orig));
return num;
}
}
} else if (Character.isLowerCase((char) ch)
|| Character.isUpperCase((char) ch)) {
do {
ch = entityBuf[entityBufIndex++] = read();
} while (Character.isLowerCase((char) ch)
|| Character.isUpperCase((char) ch));
if (ch == ';') {
char orig[] = new char[entityBufIndex - 1];
for (int i = 0; i < entityBufIndex - 1; i++)
orig[i] = (char) entityBuf[i];
String orgStr = new String(orig);
int charEntity = parser.getCharEntity(orgStr);
if (charEntity != -1) {
parser.putCharNumEntity(new Character((char) charEntity),
orgStr);
return charEntity;
}
}
}
for (int i = entityBufIndex - 1; i >= 0; i--)
unread(entityBuf[i]);
return -1;
}
private void setCharToMiscBuffer(int miscIndex, char c) {
if (miscBuffer.length <= miscIndex) {
expandMiscBuffer();
}
miscBuffer[miscIndex] = c;
}
private void addCharToMiscBuffer(char c) {
if (miscBuffer.length <= miscIndex) {
expandMiscBuffer();
}
miscBuffer[miscIndex++] = c;
}
private int readPCDATA(int cur) throws IOException, ParseException,
SAXException {
int succeeding;
int firstLFindex = this.lastLFindex;
if (!extractChar && !extractNum) {
setCharToMiscBuffer(miscIndex, (char) cur);
miscIndex++;
while (((cur = read()) != '<' || !Character
.isLowerCase((char) (succeeding = sence()))
&& !Character.isUpperCase((char) succeeding)
&& succeeding != '!'
&& succeeding != '?'
&& (succeeding != '/' || !Character
.isLowerCase((char) (succeeding = sence(1)))
&& !Character.isUpperCase((char) succeeding)))
&& cur != EOF) {
setCharToMiscBuffer(miscIndex, (char) cur);
if (miscBuffer[miscIndex - 1] == '\n' && cur != '\n') {
firstLFindex = this.index - 1;
}
miscIndex++;
}
} else if (cur != EOF) {
do {
if (cur == '&') {
if (!extractNum) {
if ((cur = read()) == '#') {
addCharToMiscBuffer('&');
addCharToMiscBuffer('#');
continue;
} else {
unread(cur);
}
}
int charEntity = rslvCharEnt();
if (charEntity == -1) {
addCharToMiscBuffer('&');
} else {
addCharToMiscBuffer((char) charEntity);
}
} else {
setCharToMiscBuffer(miscIndex, (char) cur);
if (miscIndex > 0 && miscBuffer[miscIndex - 1] == '\n'
&& cur != '\n') {
firstLFindex = this.index - 2;
}
miscIndex++;
}
} while (((cur = read()) != '<' || !Character
.isLowerCase((char) (succeeding = sence()))
&& !Character.isUpperCase((char) succeeding)
&& succeeding != '!'
&& succeeding != '?'
&& (succeeding != '/' || !Character
.isLowerCase((char) (succeeding = sence(1)))
&& !Character.isUpperCase((char) succeeding)))
&& cur != EOF);
}
if (cur != EOF)
unread(cur);
if (!prsvWS) {
int begin = 0;
while ((cur = miscBuffer[begin]) == '\r' || cur == '\n')
begin++;
int end = miscIndex - 1;
try {
while ((cur = miscBuffer[end]) == '\r' || cur == '\n')
end--;
} catch (ArrayIndexOutOfBoundsException e) {
sval = new String(miscBuffer, 0, miscIndex);
parser.setCharacter(miscBuffer, 0, miscIndex);
return (ttype = PCDATA);
}
int len = end + 1 - begin;
sval = new String(miscBuffer, begin, len);
parser.setCharacter(miscBuffer, begin, len);
if (end != miscIndex - 1) {
unread(miscBuffer, end + 1, miscIndex - end - 1);
lastLFindex = firstLFindex;
}
} else {
sval = new String(miscBuffer, 0, miscIndex);
parser.setCharacter(miscBuffer, 0, miscIndex);
return (ttype = WHITESPACE);
}
return (ttype = PCDATA);
}
private char endTagBuffer[] = new char[64];
/**
* Gets raw text up to end tag specified(&lt;<code>end</code>&gt;)
*
* @param end
* specifed end tags name
* @return raw text up to specified end tag.
*/
final String rawText(String end) throws IOException, ParseException {
char endTagArray[] = end.toUpperCase().toCharArray();
int ch;
miscIndex = 0;
int limitCandidate = 0;
boolean inComment = false;
outer: while (true) {
ch = read();
if (!inComment && ch == '<') {
ch = read();
if (ch == '/') {
if (limitCandidate == 0) {
limitCandidate = miscIndex;
}
endTagBufferIndex = 0;
for (int i = 0; i < end.length(); i++) {
endTagBuffer[endTagBufferIndex] = (char) read();
if ((endTagBuffer[endTagBufferIndex] & 0xffdf) == endTagArray[endTagBufferIndex]) {
endTagBufferIndex++;
} else {
endTagBufferIndex++;
addCharToMiscBuffer('<');
addCharToMiscBuffer('/');
for (int j = 0; j < endTagBufferIndex; j++) {
addCharToMiscBuffer(endTagBuffer[j]);
}
continue outer;
}
}
while (Character.isWhitespace((char) (ch = read()))) {
endTagBuffer[endTagBufferIndex++] = (char) ch;
}
if (ch == '>') {
endTagBufferIndex += 3;
unread(endTagBuffer, 0, endTagBufferIndex);
return new String(miscBuffer, 0, miscIndex);
} else {
endTagBuffer[endTagBufferIndex++] = (char) ch;
addCharToMiscBuffer('<');
addCharToMiscBuffer('/');
for (int i = 0; i < endTagBufferIndex; i++) {
addCharToMiscBuffer(endTagBuffer[i]);
}
}
} else if (ch == '!') {
addCharToMiscBuffer('<');
addCharToMiscBuffer((char) ch);
ch = read();
addCharToMiscBuffer((char) ch);
if (ch == '-') {
ch = read();
addCharToMiscBuffer((char) ch);
if (ch == '-') {
inComment = true;
}
}
} else {
addCharToMiscBuffer('<');
addCharToMiscBuffer((char) ch);
}
} else if (ch == EOF) {
parser.error(MISC_ERR, "Unexpected EOF in CDATA after " + end);
endTagBufferIndex = 0;
if (limitCandidate != 0) {
unread(miscBuffer, limitCandidate, miscIndex
- limitCandidate);
return new String(miscBuffer, 0, limitCandidate);
} else {
return new String(miscBuffer, 0, miscIndex);
}
} else if (inComment && ch == '-') {
addCharToMiscBuffer((char) ch);
ch = read();
addCharToMiscBuffer((char) ch);
if (ch == '-') {
ch = read();
addCharToMiscBuffer((char) ch);
if (ch == '>') {
inComment = false;
}
}
} else {
// setCharToMiscBuffer(miscIndex, (char)ch);
// miscIndex++;
addCharToMiscBuffer((char) ch);
}
}
}
private int endTagBufferIndex = 0;
void eatCDATAEndTag() throws IOException {
for (int i = endTagBufferIndex; i > 0; i--)
read();
}
private char attrValBuffer[] = new char[8192];
private static boolean includeAngleBracket(char chars[], int begin, int end) {
while (begin < end) {
char ch = chars[begin++];
if (ch == '>' || ch == '<')
return true;
}
return false;
}
private void checkAttrValBuffer(int index) {
int curSize = attrValBuffer.length;
if (curSize < index + 128) {
char tmpBuffer[] = new char[curSize + 8192];
System.arraycopy(attrValBuffer, 0, tmpBuffer, 0, curSize);
attrValBuffer = tmpBuffer;
}
}
/**
* Reads attribute values. Nasty attribute value examples:
* <ul>
* <li>width=<em>&quot;100&quot;%</em>
* <li>width=<em>&quot;100%</em> height=...
* </ul>
*/
final String readAttributeValue(AttributeDefinition ad, ElementDefinition ed)
throws IOException, ParseException, SAXException {
int ch;
while (Character.isWhitespace((char) (ch = read())))
;
miscIndex = 0;
int attrValBufferIndex = 0;
addCharToMiscBuffer((char) ch);
if (ch == '\'' || ch == '"') {
int endChar = ch;
while ((ch = read()) != EOF && ch != endChar) {
checkAttrValBuffer(attrValBufferIndex);
setCharToMiscBuffer(miscIndex, (char) ch);
miscIndex++;
if (ch == '&' && (extractChar || extractNum)) {
if (!extractNum) {
if ((ch = read()) == '#') {
attrValBuffer[attrValBufferIndex++] = '&';
attrValBuffer[attrValBufferIndex++] = '#';
continue;
} else {
unread(ch);
}
}
int charEntity = rslvCharEnt();
if (charEntity == -1) {
attrValBuffer[attrValBufferIndex++] = '&';
} else {
attrValBuffer[attrValBufferIndex++] = (char) charEntity;
for (int i = 0; i < entityBufIndex; i++) {
addCharToMiscBuffer((char) entityBuf[i]);
}
}
} else {
try {
attrValBuffer[attrValBufferIndex] = (char) ch;
} catch (ArrayIndexOutOfBoundsException e) {
char newAttrValBuffer[] = new char[attrValBuffer.length * 2];
System.arraycopy(attrValBuffer, 0, newAttrValBuffer, 0,
attrValBufferIndex);
attrValBuffer = newAttrValBuffer;
}
attrValBufferIndex++;
}
}
if (ch != EOF) {
addCharToMiscBuffer((char) ch);
} else {
parser.error(ATTR_VALUE, "EOF in attribute value.");
}
if (miscIndex > 1
&& (includeAngleBracket(miscBuffer, 1, miscIndex - 1) || miscBuffer[miscIndex - 2] == '=')
&& parser.handleError(ATTR_VALUE, new String(miscBuffer, 0,
miscIndex))) {
return readAttributeValue(ad, ed);
}
} else if (ch == '>') {
unread(ch);
return "";
} else {
int ch1 = -1;
if (!isUnquotedChar((char) ch) && (ch1 = sence()) == '"'
|| ch1 == '\'' || Character.isWhitespace((char) ch1)) {
return readAttributeValue(ad, ed);
}
attrValBuffer[attrValBufferIndex++] = (char) ch;
while ((ch = read()) != EOF && ch != '>'
&& !Character.isWhitespace((char) ch)) {
checkAttrValBuffer(attrValBufferIndex);
if (ch == '&' && (extractChar || extractNum)) {
if (!extractNum) {
if ((ch = read()) == '#') {
attrValBuffer[attrValBufferIndex++] = '&';
attrValBuffer[attrValBufferIndex++] = '#';
continue;
} else {
unread(ch);
}
}
int charEntity = rslvCharEnt();
if (charEntity == -1) {
attrValBuffer[attrValBufferIndex++] = '&';
} else {
attrValBuffer[attrValBufferIndex++] = (char) charEntity;
}
} else {
attrValBuffer[attrValBufferIndex++] = (char) ch;
}
}
if (ch == '>')
unread(ch);
}
ch = read();
String ret = new String(attrValBuffer, 0, attrValBufferIndex);
if (ch == '"' || ch == '\'') {
if (!parser.handleError(BEFORE_ATTRNAME, "\"")) {
parser.error(STARTTAG_SYNTAX_ERR,
"requires attribute name after \"" + ret + '"');
}
} else {
unread(ch);
}
return ret;
}
private int sence(int forward) throws IOException {
switch (forward) {
case 0:
int ch = read();
unread(ch);
return ch;
case 1:
int ch0 = read();
ch = read();
unread(ch);
unread(ch0);
return ch;
default:
int buf[] = new int[forward];
for (int i = 0; i < forward; i++) {
buf[i] = read();
}
for (int i = forward - 1; i >= 0; i--) {
unread(buf[i]);
}
return buf[forward - 1];
}
}
private int sence() throws IOException {
int ch = read();
unread(ch);
return ch;
}
private boolean isUnquotedChar(char ch) {
return Character.isLowerCase(ch) || Character.isUpperCase(ch)
|| Character.isDigit(ch) || ch == '-' || ch == '.';
}
private int etag() throws ParseException, IOException {
int ch;
// while (Character
// .isWhitespace(miscBuffer[miscIndex++] = (char) (ch = read())));
while (Character.isWhitespace((char) (ch = read()))) {
addCharToMiscBuffer((char) ch);
}
;
addCharToMiscBuffer((char) ch);
if (ch == '>') {
state = DEFAULT;
sval = ">";
parser.setCharacter(miscBuffer, etagStartIndex, miscIndex
- etagStartIndex);
return (ttype = '>');
} else if (ch == EOF) {
state = DEFAULT;
sval = "";
return (ttype = EOF);
} else if (Character.isLowerCase((char) ch)
|| Character.isUpperCase((char) ch)) {
int startIndex = miscIndex - 1;
do {
setCharToMiscBuffer(miscIndex, (char) (ch = read()));
miscIndex++;
} while (Character.isLowerCase((char) ch)
|| Character.isUpperCase((char) ch)
|| Character.isDigit((char) ch) || ch == '.' || ch == '-'
|| ch == ':' || ch == '#');
unread(ch);
miscIndex--;
sval = new String(miscBuffer, startIndex, miscIndex - startIndex);
return (ttype = NAME_CHAR);
} else {
return etag();
}
}
private int tag() throws ParseException, IOException {
int ch;
miscIndex = 0;
while (Character.isWhitespace((char) (ch = read())))
;
if (ch == '!') {
System.out.println("comment between DOCTYPE and HTML !!!");
return eatComment();
}
if (ch == '>') {
state = DEFAULT;
sval = ">";
return (ttype = '>');
} else if (ch == EOF) {
state = DEFAULT;
sval = "";
return (ttype = EOF);
} else if (Character.isLowerCase((char) ch)
|| Character.isUpperCase((char) ch)) {
do {
setCharToMiscBuffer(miscIndex, (char) ch);
miscIndex++;
ch = read();
} while (Character.isLowerCase((char) ch)
|| Character.isUpperCase((char) ch)
|| Character.isDigit((char) ch) || ch == '.' || ch == '-'
|| ch == ':' || ch == '#');
unread(ch);
sval = new String(miscBuffer, 0, miscIndex);
return (ttype = NAME_CHAR);
} else if (Character.isDigit((char) ch)) {
do {
addCharToMiscBuffer((char) ch);
ch = read();
} while (Character.isDigit((char) ch));
if (ch == '%') {
addCharToMiscBuffer((char) ch);
} else {
unread(ch);
}
sval = new String(miscBuffer, 0, miscIndex);
return (ttype = NUM);
} else if (ch == '<') {
ch = read();
if (ch == '/') {
sval = "</";
miscBuffer[0] = '<';
miscBuffer[1] = '/';
etagStartIndex = 0;
state = ETAGO;
return (ttype = ETAGO);
} else {
unread(ch);
sval = "<";
return (ttype = '<');
}
} else {
return (ttype = ch);
}
}
final void consumeUntil(char limit) throws IOException {
while (limit != read())
;
}
private boolean prsvWS = false;
void setPreserveWhitespace(boolean preserv) {
this.prsvWS = preserv;
}
private int index = 0;
private char charBuffer[] = new char[INITIAL_BUF_SIZ];
private void expandBuffer(int increase) {
if (bufLimit != index) {
increase += index < bufLimit ? (bufLimit + index)
: (charBuffer.length - index + bufLimit);
}
int length = charBuffer.length * 2;
while (length < increase)
length *= 2;
char newBuffer[] = new char[length];
if (index < bufLimit) {
System.arraycopy(charBuffer, index, newBuffer, index, bufLimit
- index);
} else if (index != bufLimit) {
System.arraycopy(charBuffer, 0, newBuffer, 0, bufLimit);
System.arraycopy(charBuffer, index, newBuffer, length
- charBuffer.length + index, charBuffer.length - index);
index = length - charBuffer.length + index;
}
charBuffer = newBuffer;
}
public final static int INITIAL_BUF_SIZ = 8192;
public final static int READ_UNIT = 1024;
private int bufLimit = 0;
private char miscBuffer[] = new char[INITIAL_BUF_SIZ];
private int miscIndex = 0;
private void expandMiscBuffer() {
char newBuffer[];
if (miscBuffer.length < 32768) {
newBuffer = new char[miscBuffer.length * 2];
} else {
newBuffer = new char[miscBuffer.length + 32768];
}
System.arraycopy(miscBuffer, 0, newBuffer, 0, miscIndex);
miscBuffer = newBuffer;
}
private void storeUntil(char limit) throws IOException {
int ch = read();
while (ch != limit && ch != EOF) {
setCharToMiscBuffer(miscIndex, (char) ch);
miscIndex++;
ch = read();
}
}
final String eatUntil(char limit) throws IOException {
miscIndex = 0;
int ch = read();
while (ch != limit && ch != EOF) {
setCharToMiscBuffer(miscIndex, (char) ch);
miscIndex++;
ch = read();
}
return new String(miscBuffer, 0, miscIndex);
}
private boolean skipLF;
private boolean fillBuffer() throws IOException {
if (bufLimit == charBuffer.length) {
index = 0;
bufLimit = reader.read(charBuffer, 0, READ_UNIT);
if (bufLimit == -1)
return false;
} else if (bufLimit + READ_UNIT < charBuffer.length) {
int count = reader.read(charBuffer, bufLimit, READ_UNIT);
if (count == -1)
return false;
bufLimit += count;
} else {
int count = reader.read(charBuffer, bufLimit, charBuffer.length
- bufLimit);
if (count == -1)
return false;
bufLimit += count;
}
return true;
}
// private void printCharBuffer() {
// if (bufLimit < index) {
// System.err.print(
// new String(charBuffer, index, charBuffer.length - index));
// System.err.println(new String(charBuffer, 0, bufLimit));
// } else if (bufLimit == index) {
// System.err.println("Empty");
// } else {
// System.err.println(new String(charBuffer, index, bufLimit - index));
// }
// }
private int read() throws IOException {
if (bufLimit == index) {
if (!fillBuffer())
return -1;
} else if (index == charBuffer.length) {
index = 0;
}
int ret = charBuffer[index++];
if (skipLF) {
if (ret == '\n') {
if (bufLimit == index) {
if (!fillBuffer())
return -1;
} else if (index == charBuffer.length) {
index = 0;
}
lastLFindex++;
ret = charBuffer[index++];
}
skipLF = false;
}
switch (ret) {
case '\r':
skipLF = true;
case '\n':
lineNumber++;
lastLFindex = index - 1;
return '\n';
}
return ret;
}
private int lastLFindex = -1;
private int lineNumber = 0;
private void unread(int ch) throws IOException {
if (ch == '\n')
lineNumber--;
if (index == 0) {
charBuffer[index = charBuffer.length - 1] = (char) ch;
} else {
charBuffer[--index] = (char) ch;
}
}
void unread(String str) throws IOException {
unread(str.toCharArray());
}
private void unread(char str[], int begin, int len) {
if (bufLimit >= index) {
if (charBuffer.length - (bufLimit - index) < len) {
expandBuffer(len);
}
} else if (index - bufLimit < len) {
expandBuffer(len);
}
if (index < len) {
int i = begin + len - 1;
int j;
for (j = index - 1; j >= 0; j--) {
if ((charBuffer[j] = str[i--]) == '\n')
lineNumber--;
}
j = charBuffer.length - 1;
for (; i >= begin; j--) {
if ((charBuffer[j] = str[i--]) == '\n')
lineNumber--;
}
index = j + 1;
} else {
for (int i = begin + len - 1; i >= begin; i--) {
if ((charBuffer[--index] = str[i]) == '\n')
lineNumber--;
}
}
}
private void unread(char str[]) {
unread(str, 0, str.length);
}
final int nextToken() throws IOException, ParseException, SAXException {
if (pushBacked) {
pushBacked = false;
return ttype;
}
switch (state) {
case DEFAULT:
return defaultState();
case TAG:
return tag();
case ETAG:
return etag();
default:
return -1;
}
}
final void pushBack() {
pushBacked = true;
}
final int getCurrentLine() {
return lineNumber + 1;
}
final int getCurrentCol() {
return getColumnNumber();
}
final void switchTo(int state) {
this.state = state;
}
final void close() throws IOException {
this.reader.close();
}
public String getPublicId() {
return null;
}
public String getSystemId() {
return null;
}
public int getLineNumber() {
return lineNumber + 1;
}
public int getColumnNumber() {
int ret;
if (index >= lastLFindex) {
ret = index - lastLFindex;
} else {
ret = charBuffer.length - lastLFindex + index;
}
return ret;
}
}