| /******************************************************************************* |
| * Copyright (c) 2001, 2008 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| |
| package org.eclipse.wst.dtd.core.internal.saxparser; |
| |
| /** |
| * Reader for processing/parsing xml string |
| * |
| * @version |
| */ |
| public class StringParser { |
| String fData = null; |
| int fEndOffset; |
| int fMostRecentChar; |
| int fCurrentOffset; |
| boolean fCalledCharPropInit = false; |
| |
| public StringParser(String data) { |
| fData = data; |
| fCurrentOffset = 0; |
| fEndOffset = fData.length(); |
| fMostRecentChar = fEndOffset == 0 ? -1 : fData.charAt(0); |
| } |
| |
| public String getString(int offset, int length) { |
| if (length == 0) |
| return ""; //$NON-NLS-1$ |
| return fData.substring(offset, offset + length); |
| } |
| |
| public String getData() { |
| return fData; |
| } |
| |
| public String getRemainingString() { |
| return fData.substring(getCurrentOffset()); |
| } |
| |
| public int getCurrentOffset() { |
| return fCurrentOffset; |
| } |
| |
| // |
| // |
| public int loadNextChar() { |
| if (++fCurrentOffset >= fEndOffset) { |
| fMostRecentChar = -1; |
| } |
| else { |
| fMostRecentChar = fData.charAt(fCurrentOffset); |
| } |
| return fMostRecentChar; |
| } |
| |
| public int loadPreviousChar() { |
| if (--fCurrentOffset < 0) |
| fMostRecentChar = -1; |
| else |
| fMostRecentChar = fData.charAt(fCurrentOffset); |
| return fMostRecentChar; |
| } |
| |
| // |
| // |
| public boolean lookingAtChar(char chr, boolean skipPastChar) { |
| int ch = fMostRecentChar; |
| if (ch != chr) { |
| return false; |
| } |
| if (skipPastChar) { |
| if (++fCurrentOffset >= fEndOffset) { |
| fMostRecentChar = -1; |
| } |
| else { |
| fMostRecentChar = fData.charAt(fCurrentOffset); |
| } |
| } |
| return true; |
| } |
| |
| // |
| // |
| // |
| public boolean lookingAtValidChar(boolean skipPastChar) { |
| int ch = fMostRecentChar; |
| if (ch < 0xD800) { |
| if (ch < 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D) { |
| return false; |
| } |
| if (skipPastChar) { |
| if (++fCurrentOffset >= fEndOffset) { |
| fMostRecentChar = -1; |
| } |
| else { |
| fMostRecentChar = fData.charAt(fCurrentOffset); |
| } |
| } |
| return true; |
| } |
| if (ch > 0xFFFD) { |
| return false; |
| } |
| if (ch < 0xDC00) { |
| if (fCurrentOffset + 1 >= fEndOffset) { |
| return false; |
| } |
| ch = fData.charAt(fCurrentOffset + 1); |
| if (ch < 0xDC00 || ch >= 0xE000) { |
| return false; |
| } |
| else if (!skipPastChar) { |
| return true; |
| } |
| else { |
| fCurrentOffset++; |
| } |
| } |
| else if (ch < 0xE000) { |
| return false; |
| } |
| if (skipPastChar) { |
| if (++fCurrentOffset >= fEndOffset) { |
| fMostRecentChar = -1; |
| } |
| else { |
| fMostRecentChar = fData.charAt(fCurrentOffset); |
| } |
| } |
| return true; |
| } |
| |
| // |
| // |
| // |
| public boolean lookingAtSpace(boolean skipPastChar) { |
| int ch = fMostRecentChar; |
| if (ch > 0x20) |
| return false; |
| if (ch == 0x20 || ch == 0x0A || ch == 0x0D || ch == 0x09) { |
| if (skipPastChar) { |
| loadNextChar(); |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| // |
| // |
| // |
| public void skipToChar(char chr, boolean skipPastChar) { |
| // |
| // REVISIT - this will skip invalid characters without reporting them. |
| // |
| int ch = fMostRecentChar; |
| while (true) { |
| if (ch == chr) { |
| if (skipPastChar) { |
| loadNextChar(); |
| } |
| return; |
| } |
| if (ch == -1) { |
| return; |
| } |
| ch = loadNextChar(); |
| } |
| } |
| |
| /** |
| * skips to the last occurrence of the specified character. |
| * if <code>skipPastChar</code> is true, the parser is |
| * incremented past the last occurrence of the character. |
| * This method starts at the end of the character data, and |
| * moves backwards to find the last occurrence of the character |
| */ |
| public void skipToLastOfChar(char chr, boolean skipPastChar) { |
| int ch = -1; |
| |
| // Move the cursor to the end offset to scan backwards. |
| fCurrentOffset = fEndOffset; |
| |
| do { |
| ch = loadPreviousChar(); |
| |
| if(ch == -1) |
| return; |
| |
| if (ch == chr) { |
| if(skipPastChar) |
| loadNextChar(); |
| return; |
| } |
| |
| } while(true); |
| } |
| |
| // |
| // |
| // |
| public void skipPastSpaces() { |
| int ch = fMostRecentChar; |
| if (ch == -1) { |
| // changeReaders().skipPastSpaces(); |
| return; |
| } |
| while (true) { |
| if (ch > 0x20 || (ch != 0x20 && ch != 0x0A && ch != 0x09 && ch != 0x0D)) { |
| fMostRecentChar = ch; |
| return; |
| } |
| if (++fCurrentOffset >= fEndOffset) { |
| return; |
| } |
| ch = fData.charAt(fCurrentOffset); |
| } |
| } |
| |
| // |
| // |
| // |
| |
| public void skipPastNameAndPEReference(char fastcheck) { |
| int ch = fMostRecentChar; |
| |
| if (ch != '%') { |
| if (ch < 0x80) { |
| if (ch == -1 || XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) |
| return; |
| } |
| else { |
| if (!fCalledCharPropInit) { |
| XMLCharacterProperties.initCharFlags(); |
| fCalledCharPropInit = true; |
| } |
| if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) |
| return; |
| } |
| } |
| while (true) { |
| ch = loadNextChar(); |
| if (fastcheck == ch) |
| return; |
| if (ch == '%' || ch == ';') { |
| continue; |
| } |
| |
| if (ch < 0x80) { |
| if (ch == -1 || XMLCharacterProperties.fgAsciiNameChar[ch] == 0) |
| return; |
| } |
| else { |
| if (!fCalledCharPropInit) { |
| XMLCharacterProperties.initCharFlags(); |
| fCalledCharPropInit = true; |
| } |
| if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) |
| return; |
| } |
| } |
| } |
| |
| public void skipPastName(char fastcheck) { |
| int ch = fMostRecentChar; |
| // System.out.println("skippastName ch: " +ch); |
| |
| if (ch < 0x80) { |
| if (ch == -1 || XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) |
| return; |
| } |
| else { |
| if (!fCalledCharPropInit) { |
| XMLCharacterProperties.initCharFlags(); |
| fCalledCharPropInit = true; |
| } |
| if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) |
| return; |
| } |
| |
| while (true) { |
| ch = loadNextChar(); |
| if (fastcheck == ch) |
| return; |
| if (ch < 0x80) { |
| if (ch == -1 || XMLCharacterProperties.fgAsciiNameChar[ch] == 0) |
| return; |
| } |
| else { |
| if (!fCalledCharPropInit) { |
| XMLCharacterProperties.initCharFlags(); |
| fCalledCharPropInit = true; |
| } |
| if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) |
| return; |
| } |
| } |
| } |
| |
| // |
| // |
| // |
| |
| public void skipPastNmtoken(char fastcheck) { |
| int ch = fMostRecentChar; |
| while (true) { |
| if (fastcheck == ch) |
| return; |
| if (ch < 0x80) { |
| if (ch == -1 || XMLCharacterProperties.fgAsciiNameChar[ch] == 0) |
| return; |
| } |
| else { |
| if (!fCalledCharPropInit) { |
| XMLCharacterProperties.initCharFlags(); |
| fCalledCharPropInit = true; |
| } |
| if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) |
| return; |
| } |
| ch = loadNextChar(); |
| } |
| } |
| |
| public void skipPastNmtokenAndPEReference(char fastcheck) { |
| int ch = fMostRecentChar; |
| while (true) { |
| if (fastcheck == ch) |
| return; |
| if (ch == '%' || ch == ';') { |
| ch = loadNextChar(); |
| continue; |
| } |
| if (ch < 0x80) { |
| if (ch == -1 || XMLCharacterProperties.fgAsciiNameChar[ch] == 0) |
| return; |
| } |
| else { |
| if (!fCalledCharPropInit) { |
| XMLCharacterProperties.initCharFlags(); |
| fCalledCharPropInit = true; |
| } |
| if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) |
| return; |
| } |
| ch = loadNextChar(); |
| } |
| } |
| |
| // |
| // |
| // |
| public boolean skippedString(char[] s) { |
| int ch = fMostRecentChar; |
| if (ch != s[0]) { |
| return false; |
| } |
| if (fCurrentOffset + s.length > fEndOffset) |
| return false; |
| for (int i = 1; i < s.length; i++) { |
| if (fData.charAt(fCurrentOffset + i) != s[i]) |
| return false; |
| } |
| fCurrentOffset += (s.length - 1); |
| loadNextChar(); |
| return true; |
| } |
| |
| // |
| // |
| // |
| public int scanInvalidChar() throws Exception { |
| int ch = fMostRecentChar; |
| loadNextChar(); |
| return ch; |
| } |
| |
| // |
| // |
| // |
| /* |
| * public int scanCharRef(boolean hex) throws Exception { int ch = |
| * fMostRecentChar; if (ch == -1) // return |
| * changeReaders().scanCharRef(hex); return ch; int num = 0; if (hex) { if |
| * (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0) return |
| * XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; num = ch - (ch < 'A' ? |
| * '0' : (ch < 'a' ? 'A' : 'a') - 10); } else { if (ch < '0' || ch > '9') |
| * return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; num = ch - '0'; } |
| * boolean toobig = false; while (true) { ch = loadNextChar(); if (ch == |
| * -1) return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED; if (hex) { |
| * if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0) |
| * break; } else { if (ch < '0' || ch > '9') break; } if (hex) { int dig = |
| * ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10); num = (num << 4) + |
| * dig; } else { int dig = ch - '0'; num = (num * 10) + dig; } if (num > |
| * 0x10FFFF) { toobig = true; num = 0; } } if (ch != ';') return |
| * XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED; loadNextChar(); if |
| * (toobig) return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE; return |
| * num; } |
| */ |
| // |
| // |
| // |
| /* |
| * public int scanStringLiteral() throws Exception { boolean single; if |
| * (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) { |
| * return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED; } int offset = |
| * fCurrentOffset; char qchar = single ? '\'' : '\"'; while |
| * (!lookingAtChar(qchar, false)) { if (!lookingAtValidChar(true)) { |
| * return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR; } } // int |
| * stringIndex = addString(offset, fCurrentOffset - offset); int |
| * stringIndex = addString(offset, fCurrentOffset - offset); |
| * lookingAtChar(qchar, true); // move past qchar return stringIndex; } |
| */ |
| |
| // |
| // |
| public boolean scanAttValue(char qchar) { |
| boolean result = true; |
| while (true) { |
| if (lookingAtChar(qchar, false)) { |
| break; |
| } |
| if (lookingAtChar(' ', true)) { |
| continue; |
| } |
| if (!lookingAtValidChar(true)) { |
| result = false; |
| } |
| } |
| lookingAtChar(qchar, true); |
| return result; |
| } |
| |
| // |
| // [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
| // | "'" ([^%&'] | PEReference | Reference)* "'" |
| // |
| // The values in the following table are defined as: |
| // |
| // 0 - not special |
| // 1 - quote character |
| // 2 - reference |
| // 3 - peref |
| // 4 - invalid |
| // |
| public static final byte fgAsciiEntityValueChar[] = {4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 1, 0, 0, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, // '\"', |
| // '%', |
| // '&', |
| // '\'' |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
| |
| /* |
| * public int scanEntityValue(int qchar, boolean createString) throws |
| * Exception { int offset = fCurrentOffset; int ch = fMostRecentChar; |
| * while (true) { if (ch == -1) { changeReaders(); // do not call next |
| * reader, our caller may need to change the parameters return |
| * XMLEntityHandler.ENTITYVALUE_RESULT_END_OF_INPUT; } if (ch < 0x80) { |
| * switch (fgAsciiEntityValueChar[ch]) { case 1: // quote char if (ch == |
| * qchar) { if (!createString) return |
| * XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED; int length = |
| * fCurrentOffset - offset; int result = length == 0 ? |
| * StringPool.EMPTY_STRING : addString(offset, length); loadNextChar(); |
| * return result; } // the other quote character is not special // fall |
| * through case 0: // non-special char if (++fCurrentOffset >= fEndOffset) { |
| * if (oweTrailingSpace) { oweTrailingSpace = false; ch = fMostRecentChar = ' '; } |
| * else { ch = fMostRecentChar = -1; } } else { ch = fMostRecentChar = |
| * fData.charAt(fCurrentOffset); } continue; case 2: // reference return |
| * XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE; case 3: // peref return |
| * XMLEntityHandler.ENTITYVALUE_RESULT_PEREF; case 4: // invalid return |
| * XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR; } } else if (ch < |
| * 0xD800) { ch = loadNextChar(); } else if (ch >= 0xE000 && (ch <= 0xFFFD || |
| * (ch >= 0x10000 && ch <= 0x10FFFF))) { // // REVISIT - needs more code |
| * to check surrogates. // ch = loadNextChar(); } else { return |
| * XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR; } } } |
| */ |
| |
| |
| } |