/******************************************************************************* | |
* Copyright (c) 2004, 2010 IBM Corporation and others. | |
* All rights reserved. This program and the accompanying materials | |
* are made available under the terms of the Eclipse Public License v1.0 | |
* which accompanies this distribution, and is available at | |
* http://www.eclipse.org/legal/epl-v10.html | |
* | |
* Contributors: | |
* IBM Corporation - initial API and implementation | |
*******************************************************************************/ | |
package org.eclipse.wst.xml.core.internal.parser; | |
import java.io.CharArrayReader; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.Iterator; | |
import java.util.List; | |
import org.eclipse.wst.sse.core.internal.ltk.parser.BlockMarker; | |
import org.eclipse.wst.sse.core.internal.ltk.parser.BlockTokenizer; | |
import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion; | |
import org.eclipse.wst.sse.core.internal.util.Debug; | |
import org.eclipse.wst.sse.core.utils.StringUtils; | |
import org.eclipse.wst.xml.core.internal.Logger; | |
import org.eclipse.wst.xml.core.internal.parser.regions.XMLParserRegionFactory; | |
import org.eclipse.wst.xml.core.internal.regions.DOMRegionContext; | |
%% | |
%{ | |
private int fTokenCount = 0; | |
// required holders for white-space compacting | |
private boolean fShouldLoadBuffered = false; | |
private String fBufferedContext = null; | |
private int fBufferedStart = 1; | |
private int fBufferedLength = 0; | |
private String f_context = null; | |
// state stack for handling embedded regions | |
private IntStack fStateStack = new IntStack(); | |
private String context = null; | |
private int start = 0; | |
private int textLength = 0; | |
private int length = 0; | |
// offset for tracking position specific block tags | |
private int fOffset = 0; | |
// the name of the current tag being opened | |
private String fCurrentTagName = null; | |
// the list of tag name BlockMarkers | |
private List fBlockMarkers = new ArrayList(); | |
// required to not seek text blocks on an end tag | |
private boolean fIsBlockingEnabled = false; | |
private boolean fIsCaseSensitiveBlocking = true; | |
private XMLParserRegionFactory fRegionFactory = new XMLParserRegionFactory(); | |
/** | |
* user method | |
*/ | |
public final void addBlockMarker(BlockMarker marker) { | |
if(containsTagName(marker.getTagName())) | |
return; | |
fBlockMarkers.add(marker); | |
} | |
/** | |
* user method | |
*/ | |
public final void removeBlockMarker(BlockMarker marker) { | |
fBlockMarkers.remove(marker); | |
} | |
/** | |
* user method | |
*/ | |
public final void removeBlockMarker(String tagname) { | |
if (fBlockMarkers != null) { | |
Iterator blocks = fBlockMarkers.iterator(); | |
while (blocks.hasNext()) { | |
if (((BlockMarker) blocks.next()).getTagName().equals(tagname)) | |
blocks.remove(); | |
} | |
} | |
} | |
/* user method */ | |
public final boolean isCaseSensitiveBlocking() { | |
return fIsCaseSensitiveBlocking; | |
} | |
/* user method */ | |
public final void setCaseSensitiveBlocking(boolean newValue) { | |
fIsCaseSensitiveBlocking = newValue; | |
} | |
/* user method */ | |
public boolean getBlockMarkerCaseSensitivity() { | |
return getBlockMarkerCaseSensitivity(fCurrentTagName); | |
} | |
/* user method */ | |
public boolean getBlockMarkerCaseSensitivity(String name) { | |
Iterator iterator = fBlockMarkers.iterator(); | |
while(iterator.hasNext()) { | |
BlockMarker marker = (BlockMarker)iterator.next(); | |
boolean casesensitive = marker.isCaseSensitive(); | |
if(casesensitive && marker.getTagName().equals(name)) | |
return casesensitive; | |
else if(!casesensitive && marker.getTagName().equalsIgnoreCase(name)) | |
return casesensitive; | |
} | |
return true; | |
} | |
/* user method */ | |
public String getBlockMarkerContext() { | |
return getBlockMarkerContext(fCurrentTagName); | |
} | |
/* user method */ | |
public String getBlockMarkerContext(String name) { | |
Iterator iterator = fBlockMarkers.iterator(); | |
while(iterator.hasNext()) { | |
BlockMarker marker = (BlockMarker)iterator.next(); | |
if(marker.getTagName().equals(name)) | |
return marker.getContext(); | |
} | |
return BLOCK_TEXT; | |
} | |
/* user method */ | |
public List getBlockMarkers() { | |
return fBlockMarkers; | |
} | |
/* user method */ | |
public final int getOffset() { | |
return fOffset + yychar; | |
} | |
private final boolean isBlockMarker() { | |
return isBlockMarker(fCurrentTagName); | |
} | |
private final boolean isBlockMarker(String tagName) { | |
if (!fIsBlockingEnabled) | |
return false; | |
return containsTagName(tagName); | |
} | |
/** | |
* user method | |
*/ | |
public final void beginBlockTagScan(String newTagName) { | |
beginBlockMarkerScan(newTagName, BLOCK_TEXT); | |
} | |
/** | |
* user method | |
* | |
* Special tokenizer setup. Allows tokenization to be initiated at the | |
* start of a text block within a "newTagName" tag. | |
* | |
* Example: | |
* Tokenizer toker = new Tokenizer(); | |
* toker.setCaseSensitiveBlocking(false); | |
* toker.reset(new java.io.StringReader("afiuhqwkejhtasihgalkwhtq</scripter></scr></script>asgdasga")); | |
* toker.beginBlockMarkerScan("script", BLOCK_TEXT); | |
* toker.getRegions(); | |
* | |
* Returns: | |
* BLOCK_TEXT: 0-40 | |
* XML_END_TAG_OPEN: 41-42 | |
* XML_TAG_NAME: 43-48 | |
* XML_TAG_CLOSE: 49-49 | |
* XML_CONTENT: 50-57 | |
* | |
*/ | |
public final void beginBlockMarkerScan(String newTagName, String blockcontext) { | |
yybegin(ST_BLOCK_TAG_SCAN); | |
fCurrentTagName = newTagName; | |
} | |
/** | |
* Method doScan. | |
* | |
* Returns a context region for all of the text from the current position upto the end of input or | |
* to right *before* the first occurence of searchString | |
* | |
* @param searchString - target string to search for ex.: "-->", "</tagname" | |
* @param requireTailSeparator - whether the target must be immediately followed by whitespace or '>' | |
* @param context - the context of the scanned region if non-zero length | |
* @param exitState - the state to go to if the region was of non-zero length | |
* @param abortState - the state to go to if the searchString was found immediately | |
* @return String - the context found: the desired context on a non-zero length match, the abortContext on immediate success | |
* @throws IOException | |
*/ | |
private final String doScan(String searchString, boolean requireTailSeparator, String searchContext, int exitState, int immediateFallbackState) throws IOException { | |
boolean stillSearching = true; | |
// Disable further block (probably) | |
fIsBlockingEnabled = false; | |
int searchStringLength = searchString.length(); | |
int n = 0; | |
char lastCheckChar; | |
int i; | |
boolean same = false; | |
while (stillSearching) { | |
n = 0; | |
// Ensure that enough data from the input exists to compare against the search String. | |
n = yy_advance(); | |
while(n != YYEOF && yy_currentPos < searchStringLength) | |
n = yy_advance(); | |
// If the input was too short or we've exhausted the input, stop immediately. | |
if (n == YYEOF) { | |
stillSearching = false; | |
} | |
else { | |
same = true; | |
// Ensure that we've not encountered a complete block (<%%>) that was *shorter* than the closeTagString and | |
// thus found twice at current-targetLength [since the first scan would have come out this far anyway]. | |
// Check the characters in the target versus the last targetLength characters read from the buffer | |
// and see if it matches | |
// safety check for array accesses (yy_currentPos is the *last* character we can check against) | |
if(yy_currentPos >= searchStringLength && yy_currentPos <= yy_buffer.length) { | |
for(i = 0; i < searchStringLength; i++) { | |
if(same && fIsCaseSensitiveBlocking) | |
same = yy_buffer[i + yy_currentPos - searchStringLength] == searchString.charAt(i); | |
else if(same && !fIsCaseSensitiveBlocking) | |
same = Character.toLowerCase(yy_buffer[i + yy_currentPos - searchStringLength]) == Character.toLowerCase(searchString.charAt(i)); | |
} | |
} | |
// safety check failed; no match is possible right now | |
else { | |
same = false; | |
} | |
if (same && requireTailSeparator && yy_currentPos < yy_buffer.length) { | |
// Additional check for close tags to ensure that targetString="</script" doesn't match | |
// "</scriptS" | |
lastCheckChar = yy_buffer[yy_currentPos]; | |
// Succeed on "</script>" and "</script " | |
if(lastCheckChar == '>' || Character.isWhitespace(lastCheckChar)) | |
stillSearching = false; | |
} | |
else { | |
stillSearching = !same || (yy_currentPos < yy_startRead + searchStringLength); | |
} | |
} | |
} | |
if (n != YYEOF || same) { | |
// We've stopped short of the end or definitely found a match | |
yy_markedPos = yy_currentPos - searchStringLength; | |
yy_currentPos = yy_markedPos + 1; | |
// If the searchString occurs at the very beginning of what would have | |
// been a Block, resume scanning normally immediately | |
if (yy_markedPos == yy_startRead) { | |
yybegin(immediateFallbackState); | |
return primGetNextToken(); | |
} | |
} | |
else { | |
// We ran through the rest of the input | |
yy_markedPos = yy_currentPos; | |
yy_currentPos++; | |
} | |
yybegin(exitState); | |
// If the ending occurs at the very beginning of what would have | |
// been a Block, resume scanning normally immediately | |
if(yy_markedPos == yy_startRead) | |
return primGetNextToken(); | |
return searchContext; | |
} | |
/** | |
* user method | |
* | |
* A generic lookahead-like operation | |
*/ | |
private final String doBlockScan(String target, String targetContext, int immediateFallbackState) throws IOException { | |
return doScan(target, false, targetContext, immediateFallbackState, immediateFallbackState); | |
} | |
/** | |
* user method | |
* does a lookahead for the current tag name | |
*/ | |
private final String doBlockTagScan() throws IOException { | |
fIsCaseSensitiveBlocking = getBlockMarkerCaseSensitivity(); | |
return doScan("</" + fCurrentTagName, true, getBlockMarkerContext(fCurrentTagName), YYINITIAL, YYINITIAL); | |
} | |
/** | |
* user method | |
* | |
* Converts the raw context String returned by the primGetNextToken() | |
* method into a full ITextRegion by pulling in values for the | |
* current offset within the scanning text. | |
* | |
* Returns null when EOF is encountered and attaches intermittently | |
* discovered whitespace onto the end of useful regions. | |
* | |
* Note that this algorithm caches the token following the one being returned | |
* so that whitespace can be collapsed. | |
*/ | |
public final ITextRegion getNextToken() throws IOException { | |
// load the starting non-whitespace token (assume that it is so) | |
if (fShouldLoadBuffered) { | |
context = fBufferedContext; | |
start = fBufferedStart; | |
textLength = length = fBufferedLength; | |
fShouldLoadBuffered = false; | |
} | |
else { | |
context = primGetNextToken(); | |
if (context == XML_TAG_NAME) { | |
if(containsTagName(yy_buffer, yy_startRead, yy_markedPos-yy_startRead)) | |
fCurrentTagName = yytext(); | |
else | |
fCurrentTagName = null; | |
} | |
else if (context == XML_TAG_OPEN) { | |
fIsBlockingEnabled = true; | |
} | |
else if (context == XML_END_TAG_OPEN) { | |
fIsBlockingEnabled = false; | |
} | |
start = yychar; | |
textLength = length = yylength(); | |
if (yy_atEOF) { | |
fTokenCount++; | |
return null; | |
} | |
} | |
// store the next token | |
f_context = primGetNextToken(); | |
if (f_context == XML_TAG_NAME) { | |
if(containsTagName(yy_buffer, yy_startRead, yy_markedPos-yy_startRead)) | |
fCurrentTagName = yytext(); | |
else | |
fCurrentTagName = null; | |
} | |
else if (f_context == XML_TAG_OPEN) { | |
fIsBlockingEnabled = true; | |
} | |
else if (f_context == XML_END_TAG_OPEN) { | |
fIsBlockingEnabled = false; | |
} | |
fBufferedContext = f_context; | |
fBufferedStart = yychar; | |
fBufferedLength = yylength(); | |
fShouldLoadBuffered = true; | |
if (fBufferedContext == WHITE_SPACE) { | |
fShouldLoadBuffered = false; | |
length += fBufferedLength; | |
} | |
if (context == null) { | |
// EOF | |
if (Debug.debugTokenizer) { | |
System.out.println(getClass().getName() + " discovered " + fTokenCount + " tokens."); //$NON-NLS-2$//$NON-NLS-1$ | |
} | |
return null; | |
} | |
fTokenCount++; | |
return fRegionFactory.createToken(context, start, textLength, length, null, fCurrentTagName); | |
} | |
/* user method */ | |
public XMLTokenizer(){ | |
super(); | |
} | |
/* user method */ | |
public XMLTokenizer(char[] charArray){ | |
this(new CharArrayReader(charArray)); | |
} | |
/* user method */ | |
public void reset(char[] charArray) { | |
reset(new CharArrayReader(charArray), 0); | |
} | |
/* user method */ | |
public void reset(char[] charArray, int newOffset) { | |
reset(new CharArrayReader(charArray), newOffset); | |
} | |
/* user method */ | |
public void reset(java.io.InputStream in) { | |
reset(new java.io.InputStreamReader(in), 0); | |
} | |
/* user method */ | |
public void reset(java.io.InputStream in, int newOffset) { | |
reset(new java.io.InputStreamReader(in), newOffset); | |
} | |
/* user method */ | |
public void reset(java.io.Reader in) { | |
reset(in, 0); | |
} | |
/** | |
* user method * | |
* | |
* Reset internal counters and vars to "newly created" values, in the hopes | |
* that resetting a pre-existing tokenizer is faster than creating a new one. | |
* | |
* This method contains code blocks that were essentially duplicated from the | |
* <em>generated</em> output of this specification before this method was | |
* added. Those code blocks were under the above copyright. | |
*/ | |
public void reset(java.io.Reader in, int newOffset) { | |
if (Debug.debugTokenizer) { | |
System.out.println("resetting tokenizer");//$NON-NLS-1$ | |
} | |
fOffset = newOffset; | |
/* the input device */ | |
yy_reader = in; | |
/* the current state of the DFA */ | |
yy_state = 0; | |
/* the current lexical state */ | |
yy_lexical_state = YYINITIAL; | |
/* this buffer contains the current text to be matched and is | |
the source of the yytext() string */ | |
java.util.Arrays.fill(yy_buffer, (char)0); | |
/* the textposition at the last accepting state */ | |
yy_markedPos = 0; | |
/* the textposition at the last state to be included in yytext */ | |
yy_pushbackPos = 0; | |
/* the current text position in the buffer */ | |
yy_currentPos = 0; | |
/* startRead marks the beginning of the yytext() string in the buffer */ | |
yy_startRead = 0; | |
/** | |
* endRead marks the last character in the buffer, that has been read | |
* from input | |
*/ | |
yy_endRead = 0; | |
/* number of newlines encountered up to the start of the matched text */ | |
yyline = 0; | |
/* the number of characters up to the start of the matched text */ | |
yychar = 0; | |
/* yy_atEOF == true <=> the scanner has returned a value for EOF */ | |
yy_atEOF = false; | |
/* denotes if the user-EOF-code has already been executed */ | |
yy_eof_done = false; | |
/* user vars: */ | |
fTokenCount = 0; | |
fShouldLoadBuffered = false; | |
fBufferedContext = null; | |
fBufferedStart = 1; | |
fBufferedLength = 0; | |
fStateStack = new IntStack(); | |
context = null; | |
start = 0; | |
textLength = 0; | |
length = 0; | |
} | |
/** | |
* user method | |
* | |
*/ | |
public BlockTokenizer newInstance() { | |
XMLTokenizer newInstance = new XMLTokenizer(); | |
// global tagmarkers can be shared; they have no state and | |
// are never destroyed (e.g. 'release') | |
for(int i = 0; i < fBlockMarkers.size(); i++) { | |
BlockMarker blockMarker = (BlockMarker) fBlockMarkers.get(i); | |
if(blockMarker.isGlobal()) | |
newInstance.addBlockMarker(blockMarker); | |
} | |
return newInstance; | |
} | |
/* user method */ | |
private final String scanXMLCommentText() throws IOException { | |
// Scan for '-->' and return the text up to that point as | |
// XML_COMMENT_TEXT unless the string occurs IMMEDIATELY, in which | |
// case change to the ST_XML_COMMENT_END state and return the next | |
// context as usual. | |
return doScan("-->", false, XML_COMMENT_TEXT, ST_XML_COMMENT_END, ST_XML_COMMENT_END); | |
} | |
%} | |
%eof{ | |
// do nothing, this is the downstream parser's job | |
%eof} | |
%public | |
%class XMLTokenizer | |
%implements BlockTokenizer, DOMRegionContext | |
%function primGetNextToken | |
%type String | |
%char | |
%line | |
%unicode | |
%pack | |
%state ST_CDATA_TEXT | |
%state ST_CDATA_END | |
%state ST_XML_COMMENT | |
%state ST_XML_COMMENT_END | |
%state ST_PI | |
%state ST_PI_WS | |
%state ST_PI_CONTENT | |
%state ST_XML_PI_ATTRIBUTE_NAME | |
%state ST_XML_PI_EQUALS | |
%state ST_XML_PI_ATTRIBUTE_VALUE | |
%state ST_XML_PI_TAG_CLOSE | |
%state ST_DHTML_ATTRIBUTE_NAME | |
%state ST_DHTML_EQUALS | |
%state ST_DHTML_ATTRIBUTE_VALUE | |
%state ST_DHTML_TAG_CLOSE | |
// normal tag states | |
%state ST_XML_TAG_NAME | |
%state ST_XML_ATTRIBUTE_NAME | |
%state ST_XML_EQUALS | |
%state ST_XML_ATTRIBUTE_VALUE | |
// declaration (DTD) states | |
%state ST_XML_DECLARATION | |
%state ST_XML_DECLARATION_CLOSE | |
%state ST_XML_DOCTYPE_DECLARATION | |
%state ST_XML_DOCTYPE_EXTERNAL_ID | |
%state ST_XML_DOCTYPE_ID_PUBLIC | |
%state ST_XML_DOCTYPE_ID_SYSTEM | |
%state ST_XML_ELEMENT_DECLARATION | |
%state ST_XML_ELEMENT_DECLARATION_CONTENT | |
%state ST_XML_ATTLIST_DECLARATION | |
%state ST_XML_ATTLIST_DECLARATION_CONTENT | |
%state ST_BLOCK_TAG_SCAN | |
// Letter = ([A-Za-z]) | |
// Digit = ([0-9]) | |
/** | |
* smaller tokens | |
*/ | |
genericTagOpen = < | |
genericTagClose = > | |
genericEndTagOpen = <\/ | |
genericEmptyTagClose = \/> | |
PIstart = <\? | |
PIend = \?> | |
// [1] document ::= prolog element Misc* | |
document = ({prolog} {element} {Misc}*) | |
// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] | |
//Char = (.) | |
Char = [\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD] | |
// [3] S ::= (0x20 | 0x9 | 0xD | 0xA)+ | |
S = [\x20\x09\x0D\x0A]+ | |
// [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender | |
NameChar = ({Letter} | {Digit} | \. | \- | _ | : | {CombiningChar} | {Extender}) | |
// [5] Name ::= (Letter | '_' | ':') NameChar* | |
//Name = ({NameChar}{NameChar}*) | |
Name = ({Letter} | _ | :){NameChar}* | |
// [6] Names ::= {Name} ({S} {Name})* | |
Names = ({Name} ({S} {Name})*) | |
// [7] Nmtoken ::= (NameChar)+ | |
Nmtoken = ({NameChar}+) | |
// [8] Nmtokens ::= Nmtoken (S Nmtoken)* | |
Nmtokens = ({Nmtoken} ({S} {Nmtoken})*) | |
// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" | |
EntityValue = (\" ([^%&\"] | {PEReference} | {Reference})* \" | \' ([^%&\'] | {PEReference} | {Reference})* \') | |
// [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" | |
AttValue = ( \" ([^\"] | {Reference})* \" | \' ([^\'] | {Reference})* \' | ([^\'\"\040\011\012\015<>/]|\/+[^\'\"\040\011\012\015<>/] )* ) | |
// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") | |
SystemLiteral = ((\" [^\"]* \") | (\' [^\']* \')) | |
// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" | |
PubidLiteral = (\" {PubidChar}* \" | \' ({PubidChar}\')* "'") | |
// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] | |
PubidChar = ([\040\015\012] | [a-zA-Z0-9] | [\-\'()\+,.\/:=?;!\*#@\$_%]) | |
// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) | |
// implement lookahead behavior during action definition | |
CharData = ([^<&(\]\]>)]*) | |
// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' | |
CommentStart = (<!\-\-) | |
CommentEnd = (\-\->) | |
Comment = ({CommentStart}.*{CommentEnd}) | |
// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' | |
PI = (<\?{PITarget} {Char}* \?>) | |
// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) | |
PITarget = ({Name}((X|x)(M|m)(L|l))) | |
// [18] CDSect ::= CDStart CData CDEnd | |
CDSect = ({CDStart}{CData}{CDEnd}) | |
// [19] CDStart ::= '<![CDATA[' | |
CDStart = <!\[CDATA\[ | |
// [20] CData ::= (Char* - (Char* ']]>' Char*)) | |
// implement lookahead behavior during action definition | |
CData = ([^(\]\]>)]*) | |
// [21] CDEnd ::= ']]>' | |
CDEnd = (\]\]>) | |
// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? | |
prolog = ({XMLDecl}? {Misc}* ({doctypedecl} {Misc}*)?) | |
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' | |
XMLDecl = (<\?xml {VersionInfo} {EncodingDecl}? {SDDecl}? {S}? \?>) | |
// [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") | |
VersionInfo = ({S}version{Eq}(\'{VersionNum}\' | \"{VersionNum}\")) | |
// [25] Eq ::= S? '=' S? | |
Eq = (\=) | |
// [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ | |
VersionNum = (([a-zA-Z0-9_.:]|\-)+) | |
// [27] Misc ::= Comment | PI | S | |
Misc = ({Comment} | {PI} | {S}) | |
// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' | |
doctypedecl = (<\!DOCTYPE{S}{Name} ({S}{ExternalID})? {S}? (\[ ({markupdecl}|{PEReference}|{S})* \]{S}?)?>) | |
// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment | |
markupdecl = ({elementdecl} | {AttlistDecl} | {EntityDecl} | {NotationDecl} | {PI} | {Comment}) | |
// [30] extSubset ::= TextDecl? extSubsetDecl | |
extSubset = ({TextDecl}? {extSubsetDecl}) | |
// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | PEReference | S )* | |
extSubsetDecl = (( {markupdecl} | {conditionalSect} | {PEReference} | {S} )*) | |
// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) | |
SDDecl = ({S}standalone{Eq}{S}*((\'(yes|no)\')|(\"(yes|no)\"))) | |
// [33] LanguageID ::= Langcode ('-' Subcode)* | |
LanguageID = ({Langcode}(\-{Subcode})*) | |
// [34] Langcode ::= ISO639Code | IanaCode | UserCode | |
Langcode = ({ISO639Code} | {IanaCode} | {UserCode}) | |
// [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) | |
ISO639Code = (([a-z]|[A-Z])([a-z]|[A-Z])) | |
// [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ | |
IanaCode = ((i|I)\-([a-z]|[A-Z])+) | |
// [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ | |
UserCode = ((x|X)\-([a-z]|[A-Z])+) | |
// [38] Subcode ::= ([a-z] | [A-Z])+ | |
Subcode = (([a-z]|[A-Z])+) | |
// [39] element ::= EmptyElemTag | STag content ETag | |
element = ({EmptyElemTag} | {STag} {content} {ETag}) | |
// [40] STag ::= '<' Name (S Attribute)* S? '>' | |
STag = (<{Name}({S}{Attribute})*{S}?>) | |
// [41] Attribute ::= Name Eq AttValue | |
Attribute = ({Name}{S}*{Eq}{S}*{AttValue}) | |
// [42] ETag ::= 'Name S? '>' | |
ETag = (<\/{Name}{S}?>) | |
// [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* | |
content = (({element} | {CharData} | {Reference} | {CDSect} | {PI} | {Comment})*) | |
// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' | |
EmptyElemTag = (<{Name}({S}{Attribute})*{S}?\/>) | |
// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' | |
elementdecl = (<\!ELEMENT{S}{Name}{S}{contentspec}{S}?>) | |
// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children | |
contentspec = (EMPTY|ANY|{Mixed}|{children}) | |
// [47] children ::= (choice | seq) ('?' | '*' | '+')? | |
children = (({choice}|{seq})(\?|\*|\+)?) | |
// CAUSES LOOP THROUGH DEFS OF CHOICE AND SEQ | |
// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? | |
cp = (({Name} | {choice} | {seq}) (\?|\*|\+)?) | |
// [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' | |
// choice = \({S}?{cp}({S}?\|{S}?{cp})*{S}?\) | |
choice = \({S}?{Name}({S}?\|{S}?{Name})*{S}?\) | |
// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' | |
// seq = (\({S}?{cp}({S}?\,{S}?{cp})*{S}?\)) | |
seq = (\({S}?{Name}({S}?\,{S}?{Name})*{S}?\)) | |
// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' | |
Mixed = ({S}?\#PCDATA({S}?\|{S}?{Name})*{S}?)*\|({S}?\#PCDATA{S}?) | |
// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' | |
AttlistDecl = (<\!ATTLIST{S}{Name}{AttDef}*{S}?>) | |
// [53] AttDef ::= S Name S AttType S DefaultDecl | |
AttDef = ({S}{Name}{S}{AttType}{S}{DefaultDecl}) | |
// [54] AttType ::= StringType | TokenizedType | EnumeratedType | |
AttType = ({StringType} | {TokenizedType} | {EnumeratedType}) | |
// [55] StringType ::= 'CDATA' | |
StringType = (CDATA) | |
// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' | |
TokenizedType = (ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS) | |
// [57] EnumeratedType ::= NotationType | Enumeration | |
EnumeratedType = ({NotationType} | {Enumeration}) | |
// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' | |
NotationType = (NOTATION{S}\({S}?{Name}({S}?\|{S}?{Name})*{S}?\)) | |
// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' | |
Enumeration = (\({S}?{Nmtoken}({S}?\|{S}?{Nmtoken})*{S}?\)) | |
// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) | |
DefaultDecl = (\#REQUIRED|\#IMPLIED|((\#FIXED{S})?{AttValue})) | |
// [61] conditionalSect ::= includeSect | ignoreSect | |
conditionalSect = ({includeSect} | {ignoreSect}) | |
// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' | |
includeSect = (<\!\[{S}?INCLUDE{S}?\[{extSubsetDecl}\]\]>) | |
// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' | |
ignoreSect = (<\!\[{S}?IGNORE{S}?\[{ignoreSectContents}*\]\]>) | |
// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* | |
ignoreSectContents = ({Ignore}(<\!\[{ignoreSectContents}\]\]>{Ignore})*) | |
// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) | |
Ignore = ([^(\<\!\[|\]\]\>)]*) | |
// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' | |
CharRef = (&#[0-9]+;|&#x[0-9a-fA-F]+;) | |
// [67] Reference ::= EntityRef | CharRef | |
Reference = ({EntityRef} | {CharRef}) | |
// [68] EntityRef = '&' Name ';' | |
EntityRef = (&{Name};) | |
// [69] PEReference ::= '%' Name ';' | |
PEReference = (%{Name};) | |
// [70] EntityDecl ::= GEDecl | PEDecl | |
EntityDecl = ({GEDecl} | {PEDecl}) | |
// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' | |
GEDecl = (<\!ENTITY{S}{Name}{S}{EntityDef}{S}?>) | |
// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' | |
PEDecl = (<\!ENTITY{S}\%{S}{Name}{S}{PEDef}{S}?>) | |
// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) | |
EntityDef = ({EntityValue} | ({ExternalID}{NDataDecl}?)) | |
// [74] PEDef ::= EntityValue | ExternalID | |
PEDef = ({EntityValue} | {ExternalID}) | |
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral | |
ExternalID = (SYSTEM{S}{SystemLiteral}|PUBLIC{S}{PubidLiteral}{S}{SystemLiteral} ) | |
// [76] NDataDecl ::= S 'NDATA' S Name | |
NDataDecl = ({S}NDATA{S}{Name}) | |
// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' | |
TextDecl = (<\?xml{VersionInfo}?{EncodingDecl}{S}?\?>) | |
// [78] extParsedEnt ::= TextDecl? content | |
extParsedEnt = ({TextDecl}?{content}) | |
// [79] extPE ::= TextDecl? extSubsetDecl | |
extPE = ({TextDecl}?{extSubsetDecl}) | |
// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) | |
EncodingDecl = ({S}encoding{S}*{Eq}{S}*(\"{EncName}\"|\'{EncName}\')) | |
// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* | |
EncName = ([A-Za-z]([A-Za-z0-9._]|\-)*) | |
// [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' | |
NotationDecl = (<\!NOTATION{S}{Name}{S}({ExternalID}|{PublicID}){S}?>) | |
// [83] PublicID ::= 'PUBLIC' S PubidLiteral | |
PublicID = (PUBLIC{S}{PubidLiteral}) | |
// [84] Letter ::= BaseChar | Ideographic | |
Letter = ({BaseChar} | {Ideographic}) | |
// [85] BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | |
// | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | |
// | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | |
// | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | |
// | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | |
// | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | |
// | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | |
// | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | |
// | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | |
// | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | |
// | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | |
// | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | |
// | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | |
// | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | |
// | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | |
// | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | |
// | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | |
// | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | |
// | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | |
// | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | |
// | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | |
// | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | |
// | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | |
// | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | |
// | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | |
// | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | |
// | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | |
// | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | |
// | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | |
// | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | |
// | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | |
// | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | |
// | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | |
// | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | |
// | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | |
// | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | |
// | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | |
// | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | |
// | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | |
// | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | |
// | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | |
// | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | |
// | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | |
// | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | |
// | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | |
// | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | |
// | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | |
// | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | |
// | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | |
// | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | |
// | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | |
// | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | |
// | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | |
// | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | |
// | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3] | |
BaseChar = [\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\uAC00-\uD7A3] | |
// [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] | |
Ideographic = [\u4E00-\u9FA5\u3007\u3021-\u3029] | |
// [87] CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | |
// | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | |
// | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | |
// | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | |
// | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | |
// | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | |
// | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | |
// | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | |
// | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | |
// | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | |
// | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | |
// | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | |
// | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | |
// | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | |
// | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | |
// | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | |
// | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | |
// | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | |
// | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | |
// | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | |
// | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | |
// | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | |
// | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | |
// | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | |
// | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | |
// | [#x302A-#x302F] | #x3099 | #x309A | |
CombiningChar = [\u0300-\u0345\u0360-\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0652\u0670\u06D6-\u06DC\u06DD-\u06DF\u06E0-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0901-\u0903\u093C\u093E-\u094C\u094D\u0951-\u0954\u0962-\u0963\u0981-\u0983\u09BC\u09BE\u09BF\u09C0-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09E2-\u09E3\u0A02\u0A3C\u0A3E\u0A3F\u0A40-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B82-\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C82-\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A] | |
// [88] Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | |
// | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | |
// | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | |
// | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | |
// | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29] | |
Digit = [\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29] | |
// [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | |
// | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | |
// | [#x30FC-#x30FE] | |
Extender = [\u00B7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035\u309D-\u309E\u30FC-\u30FE] | |
%% | |
/* white space within a tag */ | |
<ST_XML_EQUALS, ST_XML_ATTRIBUTE_NAME, ST_XML_ATTRIBUTE_VALUE, ST_PI, ST_XML_PI_EQUALS, ST_XML_PI_ATTRIBUTE_NAME, ST_XML_PI_ATTRIBUTE_VALUE, ST_XML_DECLARATION, ST_XML_DOCTYPE_DECLARATION, ST_XML_ELEMENT_DECLARATION, ST_XML_ATTLIST_DECLARATION, ST_XML_DECLARATION_CLOSE, ST_XML_DOCTYPE_ID_PUBLIC, ST_XML_DOCTYPE_ID_SYSTEM, ST_XML_DOCTYPE_EXTERNAL_ID,ST_DHTML_ATTRIBUTE_NAME,ST_DHTML_EQUALS,ST_DHTML_ATTRIBUTE_VALUE,ST_DHTML_TAG_CLOSE> {S}* { | |
if(Debug.debugTokenizer) | |
dump("white space");//$NON-NLS-1$ | |
return WHITE_SPACE; | |
} | |
// BEGIN REGULAR XML | |
/* handle opening a new tag almost anywhere */ | |
<YYINITIAL, ST_XML_TAG_NAME, ST_XML_EQUALS, ST_XML_ATTRIBUTE_NAME, ST_XML_DECLARATION, ST_XML_ATTRIBUTE_VALUE> {genericTagOpen} { | |
if(Debug.debugTokenizer) | |
dump("\nstart tag open");//$NON-NLS-1$ | |
yybegin(ST_XML_TAG_NAME); | |
return XML_TAG_OPEN; | |
} | |
/* unquoted */ | |
<YYINITIAL, ST_XML_TAG_NAME, ST_XML_EQUALS, ST_XML_ATTRIBUTE_NAME, ST_XML_ATTRIBUTE_VALUE, ST_XML_DECLARATION> {genericEndTagOpen} { | |
if(Debug.debugTokenizer) | |
dump("\nend tag open");//$NON-NLS-1$ | |
yybegin(ST_XML_TAG_NAME); | |
return XML_END_TAG_OPEN; | |
} | |
/* the tag's name was found, start scanning for attributes */ | |
<ST_XML_TAG_NAME> {Name} { | |
if(Debug.debugTokenizer) | |
dump("tag name");//$NON-NLS-1$ | |
yybegin(ST_XML_ATTRIBUTE_NAME); | |
return XML_TAG_NAME; | |
} | |
/* another attribute name was found, resume looking for the equals sign */ | |
<ST_XML_ATTRIBUTE_NAME, ST_XML_EQUALS> {Name} { | |
if(Debug.debugTokenizer) | |
dump("attr name");//$NON-NLS-1$ | |
yybegin(ST_XML_EQUALS); | |
return XML_TAG_ATTRIBUTE_NAME; | |
} | |
/* an equal sign was found, what's next is the value */ | |
<ST_XML_EQUALS> {Eq} { | |
if(Debug.debugTokenizer) | |
dump("equals");//$NON-NLS-1$ | |
yybegin(ST_XML_ATTRIBUTE_VALUE); | |
return XML_TAG_ATTRIBUTE_EQUALS; | |
} | |
/* the value was found, look for the next name */ | |
/* allow for unbalanced quotes, mostly a duplicate of the AttValue rule */ | |
<ST_XML_ATTRIBUTE_VALUE> {AttValue} | ([\'\"]([^\'\"\040\011\012\015<>/]|\/+[^\'\"\040\011\012\015<>/] )* ) { | |
if(Debug.debugTokenizer) | |
dump("attr value");//$NON-NLS-1$ | |
yybegin(ST_XML_ATTRIBUTE_NAME); | |
return XML_TAG_ATTRIBUTE_VALUE; | |
} | |
/* the tag's close was found */ | |
<ST_XML_TAG_NAME, ST_XML_EQUALS, ST_XML_ATTRIBUTE_NAME, ST_XML_ATTRIBUTE_VALUE> {genericTagClose} { | |
if(Debug.debugTokenizer) | |
dump("tag close");//$NON-NLS-1$ | |
if(isBlockMarker()) { | |
yybegin(ST_BLOCK_TAG_SCAN); | |
} | |
else | |
yybegin(YYINITIAL); | |
return XML_TAG_CLOSE; | |
} | |
/* the tag's close was found, but the tag doesn't need a matching end tag */ | |
<ST_XML_TAG_NAME, ST_XML_EQUALS, ST_XML_ATTRIBUTE_NAME, ST_XML_ATTRIBUTE_VALUE> {genericEmptyTagClose} { | |
yybegin(YYINITIAL); | |
if(Debug.debugTokenizer) | |
dump("empty tag close");//$NON-NLS-1$ | |
return XML_EMPTY_TAG_CLOSE; | |
} | |
<ST_XML_TAG_NAME> [^</>\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\uAC00-\uD7A3\u4E00-\u9FA5\u3007\u3021-\u3029]* { | |
if(Debug.debugTokenizer) | |
dump("inappropriate tag name");//$NON-NLS-1$ | |
yybegin(YYINITIAL); | |
return XML_CONTENT; | |
} | |
// END REGULAR XML | |
// XML Comments | |
<YYINITIAL, ST_XML_TAG_NAME, ST_XML_EQUALS, ST_XML_ATTRIBUTE_NAME, ST_XML_ATTRIBUTE_VALUE, ST_XML_DECLARATION> {CommentStart} { | |
if(Debug.debugTokenizer) | |
dump("\ncomment start");//$NON-NLS-1$ | |
yybegin(ST_XML_COMMENT); | |
return XML_COMMENT_OPEN; | |
} | |
<ST_XML_COMMENT> .|\r|\n { | |
if(Debug.debugTokenizer) | |
dump("comment content");//$NON-NLS-1$ | |
return scanXMLCommentText(); | |
} | |
<ST_XML_COMMENT_END> {CommentEnd} { | |
if(Debug.debugTokenizer) | |
dump("comment end");//$NON-NLS-1$ | |
yybegin(YYINITIAL); | |
return XML_COMMENT_CLOSE; | |
} | |
// XML misc | |
{CDStart} { | |
if(Debug.debugTokenizer) | |
dump("\nCDATA start");//$NON-NLS-1$ | |
fStateStack.push(yystate()); | |
yybegin(ST_CDATA_TEXT); | |
return XML_CDATA_OPEN; | |
} | |
<ST_CDATA_TEXT> .|\r|\n { | |
if(Debug.debugTokenizer) | |
dump("CDATA text");//$NON-NLS-1$ | |
String blockContext = doBlockScan("]]>", XML_CDATA_TEXT, ST_CDATA_END);//$NON-NLS-1$ | |
if(blockContext == XML_CDATA_TEXT) | |
yybegin(ST_CDATA_END); | |
return blockContext; | |
} | |
<ST_CDATA_END> {CDEnd} { | |
if(Debug.debugTokenizer) | |
dump("CDATA end");//$NON-NLS-1$ | |
yybegin(fStateStack.pop()); | |
return XML_CDATA_CLOSE; | |
} | |
<YYINITIAL> {PEReference} { | |
if(Debug.debugTokenizer) | |
dump("\nPEReference");//$NON-NLS-1$ | |
return XML_PE_REFERENCE; | |
} | |
<YYINITIAL> {CharRef} { | |
if(Debug.debugTokenizer) | |
dump("\nCharRef");//$NON-NLS-1$ | |
return XML_CHAR_REFERENCE; | |
} | |
<YYINITIAL> {EntityRef} { | |
if(Debug.debugTokenizer) | |
dump("\nEntityRef");//$NON-NLS-1$ | |
return XML_ENTITY_REFERENCE; | |
} | |
<YYINITIAL> {PIstart} { | |
if(Debug.debugTokenizer) | |
dump("\nprocessing instruction start");//$NON-NLS-1$ | |
yybegin(ST_PI); | |
return XML_PI_OPEN; | |
} | |
// the next four are order dependent | |
<ST_PI> ((X|x)(M|m)(L|l)) { | |
if(Debug.debugTokenizer) | |
dump("XML processing instruction target");//$NON-NLS-1$ | |
yybegin(ST_XML_PI_ATTRIBUTE_NAME); | |
return XML_TAG_NAME; | |
} | |
<ST_PI> ([iI][mM][pP][oO][rR][tT]{S}*) { | |
if(Debug.debugTokenizer) | |
dump("DHTML processing instruction target");//$NON-NLS-1$ | |
yybegin(ST_DHTML_ATTRIBUTE_NAME); | |
return XML_TAG_NAME; | |
} | |
<ST_PI> xml-stylesheet { | |
if(Debug.debugTokenizer) | |
dump("XSL processing instruction target");//$NON-NLS-1$ | |
yybegin(ST_XML_PI_ATTRIBUTE_NAME); | |
return XML_TAG_NAME; | |
} | |
<ST_PI> {Name} { | |
if(Debug.debugTokenizer) | |
dump("processing instruction target");//$NON-NLS-1$ | |
yybegin(ST_PI_WS); | |
return XML_TAG_NAME; | |
} | |
<ST_PI_WS> {S}+ { | |
yybegin(ST_PI_CONTENT); | |
return WHITE_SPACE; | |
} | |
<ST_PI, ST_PI_WS> \?> { | |
if(Debug.debugTokenizer) | |
dump("processing instruction end");//$NON-NLS-1$ | |
yybegin(YYINITIAL); | |
return XML_PI_CLOSE; | |
} | |
<ST_PI_CONTENT> . { | |
// block scan until close is found | |
return doScan("?>", false, XML_PI_CONTENT, ST_XML_PI_TAG_CLOSE, ST_XML_PI_TAG_CLOSE); | |
} | |
<ST_PI_CONTENT,ST_XML_PI_TAG_CLOSE> \?> { | |
// ended with nothing inside | |
yybegin(YYINITIAL); | |
return XML_PI_CLOSE; | |
} | |
<ST_XML_PI_ATTRIBUTE_NAME, ST_XML_PI_EQUALS> {Name} { | |
if(Debug.debugTokenizer) | |
dump("XML processing instruction attribute name");//$NON-NLS-1$ | |
yybegin(ST_XML_PI_EQUALS); | |
return XML_TAG_ATTRIBUTE_NAME; | |
} | |
<ST_XML_PI_EQUALS> {Eq} { | |
if(Debug.debugTokenizer) | |
dump("XML processing instruction '='");//$NON-NLS-1$ | |
yybegin(ST_XML_PI_ATTRIBUTE_VALUE); | |
return XML_TAG_ATTRIBUTE_EQUALS; | |
} | |
/* the value was found, look for the next name */ | |
<ST_XML_PI_ATTRIBUTE_VALUE> {AttValue} | ([\'\"]([^\'\"\040\011\012\015<>/]|\/+[^\'\"\040\011\012\015<>/] )* ) { | |
if(Debug.debugTokenizer) | |
dump("XML processing instruction attribute value");//$NON-NLS-1$ | |
yybegin(ST_XML_PI_ATTRIBUTE_NAME); | |
return XML_TAG_ATTRIBUTE_VALUE; | |
} | |
/* the PI's close was found */ | |
<ST_XML_PI_EQUALS, ST_XML_PI_ATTRIBUTE_NAME, ST_XML_PI_ATTRIBUTE_VALUE> {PIend} { | |
if(Debug.debugTokenizer) | |
dump("XML processing instruction end");//$NON-NLS-1$ | |
yybegin(YYINITIAL); | |
return XML_PI_CLOSE; | |
} | |
// DHTML | |
<ST_DHTML_ATTRIBUTE_NAME, ST_DHTML_EQUALS> {Name} { | |
if(Debug.debugTokenizer) | |
dump("DHTML processing instruction attribute name");//$NON-NLS-1$ | |
yybegin(ST_DHTML_EQUALS); | |
return XML_TAG_ATTRIBUTE_NAME; | |
} | |
<ST_DHTML_EQUALS> {Eq} { | |
if(Debug.debugTokenizer) | |
dump("DHTML processing instruction '='");//$NON-NLS-1$ | |
yybegin(ST_DHTML_ATTRIBUTE_VALUE); | |
return XML_TAG_ATTRIBUTE_EQUALS; | |
} | |
/* the value was found, look for the next name */ | |
<ST_DHTML_ATTRIBUTE_VALUE> {AttValue} | ([\'\"]([^\'\"\040\011\012\015<>/]|\/+[^\'\"\040\011\012\015<>/] )* ) { | |
if(Debug.debugTokenizer) | |
dump("DHTML processing instruction attribute value");//$NON-NLS-1$ | |
yybegin(ST_DHTML_ATTRIBUTE_NAME); | |
return XML_TAG_ATTRIBUTE_VALUE; | |
} | |
/* The DHTML PI's close was found */ | |
<ST_DHTML_EQUALS, ST_DHTML_ATTRIBUTE_NAME, ST_DHTML_ATTRIBUTE_VALUE> [/]*> { | |
if(Debug.debugTokenizer) | |
dump("DHTML processing instruction end");//$NON-NLS-1$ | |
yybegin(YYINITIAL); | |
return XML_PI_CLOSE; | |
} | |
// XML declarations | |
<YYINITIAL, ST_XML_TAG_NAME, ST_XML_EQUALS, ST_XML_ATTRIBUTE_NAME, ST_XML_ATTRIBUTE_VALUE> {genericTagOpen}! { | |
fStateStack.push(yystate()); | |
if(Debug.debugTokenizer) | |
dump("\ndeclaration start");//$NON-NLS-1$ | |
yybegin(ST_XML_DECLARATION); | |
return XML_DECLARATION_OPEN; | |
} | |
<ST_XML_DECLARATION> [Ee][Ll][Ee][Mm][Ee][Nn][Tt] { | |
if(Debug.debugTokenizer) | |
dump("element");//$NON-NLS-1$ | |
yybegin(ST_XML_ELEMENT_DECLARATION); | |
return XML_ELEMENT_DECLARATION; | |
} | |
<ST_XML_DECLARATION> [Dd][Oo][Cc][Tt][Yy][Pp][Ee] { | |
if(Debug.debugTokenizer) | |
dump("doctype");//$NON-NLS-1$ | |
yybegin(ST_XML_DOCTYPE_DECLARATION); | |
return XML_DOCTYPE_DECLARATION; | |
} | |
<ST_XML_DECLARATION> [Aa][Tt][Tt][Ll][Ii][Ss][Tt] { | |
if(Debug.debugTokenizer) | |
dump("attlist");//$NON-NLS-1$ | |
yybegin(ST_XML_ATTLIST_DECLARATION); | |
return XML_ATTLIST_DECLARATION; | |
} | |
// begin DOCTYPE handling procedure | |
<ST_XML_DOCTYPE_DECLARATION, ST_XML_DOCTYPE_EXTERNAL_ID, ST_XML_DOCTYPE_ID_SYSTEM, ST_XML_DOCTYPE_ID_PUBLIC, ST_XML_DECLARATION_CLOSE> \[[^\]]*\] { | |
return XML_DOCTYPE_INTERNAL_SUBSET; | |
} | |
<ST_XML_DOCTYPE_DECLARATION> {Name} { | |
if(Debug.debugTokenizer) | |
dump("doctype type");//$NON-NLS-1$ | |
yybegin(ST_XML_DOCTYPE_EXTERNAL_ID); | |
return XML_DOCTYPE_NAME; | |
} | |
<ST_XML_DOCTYPE_EXTERNAL_ID> [Pp][Uu][Bb][Ll][Ii][Cc] { | |
if(Debug.debugTokenizer) | |
dump("doctype external id");//$NON-NLS-1$ | |
yybegin(ST_XML_DOCTYPE_ID_PUBLIC); | |
return XML_DOCTYPE_EXTERNAL_ID_PUBLIC; | |
} | |
<ST_XML_DOCTYPE_EXTERNAL_ID> [Ss][Yy][Ss][Tt][Ee][Mm] { | |
if(Debug.debugTokenizer) | |
dump("doctype external id");//$NON-NLS-1$ | |
yybegin(ST_XML_DOCTYPE_ID_SYSTEM); | |
return XML_DOCTYPE_EXTERNAL_ID_SYSTEM; | |
} | |
<ST_XML_DOCTYPE_ID_PUBLIC> {AttValue}|{PubidLiteral}|([\'\"]([^\'\"\040\011\012\015<>/]|\/+[^\'\"\040\011\012\015<>/] )* ) { | |
if(Debug.debugTokenizer) | |
dump("doctype public reference");//$NON-NLS-1$ | |
yybegin(ST_XML_DOCTYPE_ID_SYSTEM); | |
return XML_DOCTYPE_EXTERNAL_ID_PUBREF; | |
} | |
<ST_XML_DOCTYPE_ID_SYSTEM> {AttValue}|{SystemLiteral}|([\'\"]([^\'\"\040\011\012\015<>/]|\/+[^\'\"\040\011\012\015<>/] )* ) { | |
if(Debug.debugTokenizer) | |
dump("doctype system reference");//$NON-NLS-1$ | |
yybegin(ST_XML_DECLARATION_CLOSE); | |
return XML_DOCTYPE_EXTERNAL_ID_SYSREF; | |
} | |
// end DOCTYPE handling | |
// begin ELEMENT handling procedure | |
<ST_XML_ELEMENT_DECLARATION> {AttValue}|{PubidLiteral}|([\'\"]([^\'\"\040\011\012\015<>/]|\/+[^\'\"\040\011\012\015<>/] )* ) { | |
if(Debug.debugTokenizer) | |
dump("elementdecl name");//$NON-NLS-1$ | |
yybegin(ST_XML_ELEMENT_DECLARATION_CONTENT); | |
return XML_ELEMENT_DECL_NAME; | |
} | |
<ST_XML_ELEMENT_DECLARATION_CONTENT> [^>]* { | |
if(Debug.debugTokenizer) | |
dump("elementdecl contentspec");//$NON-NLS-1$ | |
return XML_ELEMENT_DECL_CONTENT; | |
} | |
<ST_XML_ELEMENT_DECLARATION_CONTENT> {genericTagClose} { | |
if(Debug.debugTokenizer) | |
dump("elementdecl close");//$NON-NLS-1$ | |
if (Debug.debugTokenizer) { | |
if(fStateStack.peek()!=YYINITIAL) | |
System.out.println("end embedded region");//$NON-NLS-1$ | |
} | |
yybegin(fStateStack.pop()); | |
return XML_DECLARATION_CLOSE; | |
} | |
// end ELEMENT handling | |
// begin ATTLIST handling procedure | |
<ST_XML_ATTLIST_DECLARATION> {AttValue}|{PubidLiteral}|([\'\"]([^\'\"\040\011\012\015<>/]|\/+[^\'\"\040\011\012\015<>/] )* ) { | |
if(Debug.debugTokenizer) | |
dump("attlist name");//$NON-NLS-1$ | |
yybegin(ST_XML_ATTLIST_DECLARATION_CONTENT); | |
return XML_ATTLIST_DECL_NAME; | |
} | |
<ST_XML_ATTLIST_DECLARATION_CONTENT> [^>]* { | |
if(Debug.debugTokenizer) | |
dump("attlist contentspec");//$NON-NLS-1$ | |
return XML_ATTLIST_DECL_CONTENT; | |
} | |
<ST_XML_ATTLIST_DECLARATION_CONTENT> {genericTagClose} { | |
if(Debug.debugTokenizer) | |
dump("attlist close");//$NON-NLS-1$ | |
if (Debug.debugTokenizer) { | |
if(fStateStack.peek()!=YYINITIAL) | |
System.out.println("end embedded region");//$NON-NLS-1$ | |
} | |
yybegin(fStateStack.pop()); | |
return XML_DECLARATION_CLOSE; | |
} | |
// end ATTLIST handling | |
<ST_XML_DECLARATION, ST_XML_DOCTYPE_DECLARATION, ST_XML_DOCTYPE_EXTERNAL_ID, ST_XML_ATTLIST_DECLARATION, ST_XML_ELEMENT_DECLARATION, ST_XML_DECLARATION_CLOSE, ST_XML_DOCTYPE_ID_PUBLIC, ST_XML_DOCTYPE_ID_SYSTEM, ST_XML_DOCTYPE_EXTERNAL_ID> {genericTagClose} { | |
if(Debug.debugTokenizer) | |
dump("declaration end");//$NON-NLS-1$ | |
if (Debug.debugTokenizer) { | |
if(fStateStack.peek()!=YYINITIAL) | |
System.out.println("end embedded region");//$NON-NLS-1$ | |
} | |
yybegin(fStateStack.pop()); | |
return XML_DECLARATION_CLOSE; | |
} | |
// end DECLARATION handling | |
<YYINITIAL> [^<&%]*|[&%]{S}+{Name}[^&%<]*|[&%]{Name}([^;&%<]*|{S}+;*) { | |
if(Debug.debugTokenizer) | |
dump("\nXML content");//$NON-NLS-1$ | |
return XML_CONTENT; | |
} | |
<ST_BLOCK_TAG_SCAN> .|\r|\n { | |
return doBlockTagScan(); | |
} | |
. { | |
if (Debug.debugTokenizer) | |
System.out.println("!!!unexpected!!!: \"" + yytext() + "\":" + //$NON-NLS-2$//$NON-NLS-1$ | |
yychar + "-" + (yychar + yylength()));//$NON-NLS-1$ | |
return UNDEFINED; | |
} | |
\040 { | |
if(Debug.debugTokenizer) | |
dump("SPACE");//$NON-NLS-1$ | |
return WHITE_SPACE; | |
} | |
\011 { | |
if(Debug.debugTokenizer) | |
dump("0x9");//$NON-NLS-1$ | |
return WHITE_SPACE; | |
} | |
\015 | |
{ | |
if(Debug.debugTokenizer) | |
dump("CARRIAGE RETURN");//$NON-NLS-1$ | |
return WHITE_SPACE; | |
} | |
\012 { | |
if(Debug.debugTokenizer) | |
dump("LINE FEED");//$NON-NLS-1$ | |
return WHITE_SPACE; | |
} |