blob: 6a51cdc72c7bcba31b83a8e60c0fb60881813fdf [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2001, 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.wst.dtd.core.internal.saxparser;
/**
* Reader for processing/parsing xml string
*
* @version
*/
public class StringParser {
String fData = null;
int fEndOffset;
int fMostRecentChar;
int fCurrentOffset;
boolean fCalledCharPropInit = false;
public StringParser(String data) {
fData = data;
fCurrentOffset = 0;
fEndOffset = fData.length();
fMostRecentChar = fEndOffset == 0 ? -1 : fData.charAt(0);
}
public String getString(int offset, int length) {
if (length == 0)
return ""; //$NON-NLS-1$
return fData.substring(offset, offset + length);
}
public String getData() {
return fData;
}
public String getRemainingString() {
return fData.substring(getCurrentOffset());
}
public int getCurrentOffset() {
return fCurrentOffset;
}
//
//
public int loadNextChar() {
if (++fCurrentOffset >= fEndOffset) {
fMostRecentChar = -1;
}
else {
fMostRecentChar = fData.charAt(fCurrentOffset);
}
return fMostRecentChar;
}
//
//
public boolean lookingAtChar(char chr, boolean skipPastChar) {
int ch = fMostRecentChar;
if (ch != chr) {
return false;
}
if (skipPastChar) {
if (++fCurrentOffset >= fEndOffset) {
fMostRecentChar = -1;
}
else {
fMostRecentChar = fData.charAt(fCurrentOffset);
}
}
return true;
}
//
//
//
public boolean lookingAtValidChar(boolean skipPastChar) {
int ch = fMostRecentChar;
if (ch < 0xD800) {
if (ch < 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D) {
return false;
}
if (skipPastChar) {
if (++fCurrentOffset >= fEndOffset) {
fMostRecentChar = -1;
}
else {
fMostRecentChar = fData.charAt(fCurrentOffset);
}
}
return true;
}
if (ch > 0xFFFD) {
return false;
}
if (ch < 0xDC00) {
if (fCurrentOffset + 1 >= fEndOffset) {
return false;
}
ch = fData.charAt(fCurrentOffset + 1);
if (ch < 0xDC00 || ch >= 0xE000) {
return false;
}
else if (!skipPastChar) {
return true;
}
else {
fCurrentOffset++;
}
}
else if (ch < 0xE000) {
return false;
}
if (skipPastChar) {
if (++fCurrentOffset >= fEndOffset) {
fMostRecentChar = -1;
}
else {
fMostRecentChar = fData.charAt(fCurrentOffset);
}
}
return true;
}
//
//
//
public boolean lookingAtSpace(boolean skipPastChar) {
int ch = fMostRecentChar;
if (ch > 0x20)
return false;
if (ch == 0x20 || ch == 0x0A || ch == 0x0D || ch == 0x09) {
if (skipPastChar) {
loadNextChar();
}
return true;
}
return false;
}
//
//
//
public void skipToChar(char chr, boolean skipPastChar) {
//
// REVISIT - this will skip invalid characters without reporting them.
//
int ch = fMostRecentChar;
while (true) {
if (ch == chr) {
if (skipPastChar) {
loadNextChar();
}
return;
}
if (ch == -1) {
return;
}
ch = loadNextChar();
}
}
//
//
//
public void skipPastSpaces() {
int ch = fMostRecentChar;
if (ch == -1) {
// changeReaders().skipPastSpaces();
return;
}
while (true) {
if (ch > 0x20 || (ch != 0x20 && ch != 0x0A && ch != 0x09 && ch != 0x0D)) {
fMostRecentChar = ch;
return;
}
if (++fCurrentOffset >= fEndOffset) {
return;
}
ch = fData.charAt(fCurrentOffset);
}
}
//
//
//
public void skipPastNameAndPEReference(char fastcheck) {
int ch = fMostRecentChar;
if (ch != '%') {
if (ch < 0x80) {
if (ch == -1 || XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0)
return;
}
else {
if (!fCalledCharPropInit) {
XMLCharacterProperties.initCharFlags();
fCalledCharPropInit = true;
}
if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
return;
}
}
while (true) {
ch = loadNextChar();
if (fastcheck == ch)
return;
if (ch == '%' || ch == ';') {
continue;
}
if (ch < 0x80) {
if (ch == -1 || XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
return;
}
else {
if (!fCalledCharPropInit) {
XMLCharacterProperties.initCharFlags();
fCalledCharPropInit = true;
}
if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
return;
}
}
}
public void skipPastName(char fastcheck) {
int ch = fMostRecentChar;
// System.out.println("skippastName ch: " +ch);
if (ch < 0x80) {
if (ch == -1 || XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0)
return;
}
else {
if (!fCalledCharPropInit) {
XMLCharacterProperties.initCharFlags();
fCalledCharPropInit = true;
}
if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
return;
}
while (true) {
ch = loadNextChar();
if (fastcheck == ch)
return;
if (ch < 0x80) {
if (ch == -1 || XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
return;
}
else {
if (!fCalledCharPropInit) {
XMLCharacterProperties.initCharFlags();
fCalledCharPropInit = true;
}
if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
return;
}
}
}
//
//
//
public void skipPastNmtoken(char fastcheck) {
int ch = fMostRecentChar;
while (true) {
if (fastcheck == ch)
return;
if (ch < 0x80) {
if (ch == -1 || XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
return;
}
else {
if (!fCalledCharPropInit) {
XMLCharacterProperties.initCharFlags();
fCalledCharPropInit = true;
}
if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
return;
}
ch = loadNextChar();
}
}
public void skipPastNmtokenAndPEReference(char fastcheck) {
int ch = fMostRecentChar;
while (true) {
if (fastcheck == ch)
return;
if (ch == '%' || ch == ';') {
ch = loadNextChar();
continue;
}
if (ch < 0x80) {
if (ch == -1 || XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
return;
}
else {
if (!fCalledCharPropInit) {
XMLCharacterProperties.initCharFlags();
fCalledCharPropInit = true;
}
if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
return;
}
ch = loadNextChar();
}
}
//
//
//
public boolean skippedString(char[] s) {
int ch = fMostRecentChar;
if (ch != s[0]) {
return false;
}
if (fCurrentOffset + s.length > fEndOffset)
return false;
for (int i = 1; i < s.length; i++) {
if (fData.charAt(fCurrentOffset + i) != s[i])
return false;
}
fCurrentOffset += (s.length - 1);
loadNextChar();
return true;
}
//
//
//
public int scanInvalidChar() throws Exception {
int ch = fMostRecentChar;
loadNextChar();
return ch;
}
//
//
//
/*
* public int scanCharRef(boolean hex) throws Exception { int ch =
* fMostRecentChar; if (ch == -1) // return
* changeReaders().scanCharRef(hex); return ch; int num = 0; if (hex) { if
* (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0) return
* XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; num = ch - (ch < 'A' ?
* '0' : (ch < 'a' ? 'A' : 'a') - 10); } else { if (ch < '0' || ch > '9')
* return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; num = ch - '0'; }
* boolean toobig = false; while (true) { ch = loadNextChar(); if (ch ==
* -1) return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED; if (hex) {
* if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
* break; } else { if (ch < '0' || ch > '9') break; } if (hex) { int dig =
* ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10); num = (num << 4) +
* dig; } else { int dig = ch - '0'; num = (num * 10) + dig; } if (num >
* 0x10FFFF) { toobig = true; num = 0; } } if (ch != ';') return
* XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED; loadNextChar(); if
* (toobig) return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE; return
* num; }
*/
//
//
//
/*
* public int scanStringLiteral() throws Exception { boolean single; if
* (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) {
* return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED; } int offset =
* fCurrentOffset; char qchar = single ? '\'' : '\"'; while
* (!lookingAtChar(qchar, false)) { if (!lookingAtValidChar(true)) {
* return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR; } } // int
* stringIndex = addString(offset, fCurrentOffset - offset); int
* stringIndex = addString(offset, fCurrentOffset - offset);
* lookingAtChar(qchar, true); // move past qchar return stringIndex; }
*/
//
//
public boolean scanAttValue(char qchar) {
boolean result = true;
while (true) {
if (lookingAtChar(qchar, false)) {
break;
}
if (lookingAtChar(' ', true)) {
continue;
}
if (!lookingAtValidChar(true)) {
result = false;
}
}
lookingAtChar(qchar, true);
return result;
}
//
// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
// | "'" ([^%&'] | PEReference | Reference)* "'"
//
// The values in the following table are defined as:
//
// 0 - not special
// 1 - quote character
// 2 - reference
// 3 - peref
// 4 - invalid
//
public static final byte fgAsciiEntityValueChar[] = {4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 1, 0, 0, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, // '\"',
// '%',
// '&',
// '\''
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
/*
* public int scanEntityValue(int qchar, boolean createString) throws
* Exception { int offset = fCurrentOffset; int ch = fMostRecentChar;
* while (true) { if (ch == -1) { changeReaders(); // do not call next
* reader, our caller may need to change the parameters return
* XMLEntityHandler.ENTITYVALUE_RESULT_END_OF_INPUT; } if (ch < 0x80) {
* switch (fgAsciiEntityValueChar[ch]) { case 1: // quote char if (ch ==
* qchar) { if (!createString) return
* XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED; int length =
* fCurrentOffset - offset; int result = length == 0 ?
* StringPool.EMPTY_STRING : addString(offset, length); loadNextChar();
* return result; } // the other quote character is not special // fall
* through case 0: // non-special char if (++fCurrentOffset >= fEndOffset) {
* if (oweTrailingSpace) { oweTrailingSpace = false; ch = fMostRecentChar = ' '; }
* else { ch = fMostRecentChar = -1; } } else { ch = fMostRecentChar =
* fData.charAt(fCurrentOffset); } continue; case 2: // reference return
* XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE; case 3: // peref return
* XMLEntityHandler.ENTITYVALUE_RESULT_PEREF; case 4: // invalid return
* XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR; } } else if (ch <
* 0xD800) { ch = loadNextChar(); } else if (ch >= 0xE000 && (ch <= 0xFFFD ||
* (ch >= 0x10000 && ch <= 0x10FFFF))) { // // REVISIT - needs more code
* to check surrogates. // ch = loadNextChar(); } else { return
* XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR; } } }
*/
}