blob: 6eeb531faa7501b72e643ba7085e51b1ce919079 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 1998, 2008 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Goh KONDOH - initial API and implementation
*******************************************************************************/
package org.eclipse.actf.model.internal.dom.sgml.impl;
import java.io.IOException;
import java.io.Reader;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
import org.eclipse.actf.model.dom.html.IErrorHandler;
import org.eclipse.actf.model.dom.html.IErrorLogListener;
import org.eclipse.actf.model.dom.html.IParser;
import org.eclipse.actf.model.dom.html.IParserError;
import org.eclipse.actf.model.dom.html.ParseException;
import org.eclipse.actf.model.internal.dom.sgml.ISGMLConstants;
import org.eclipse.actf.model.internal.dom.sgml.ISGMLParser;
import org.eclipse.actf.model.internal.dom.sgml.errorhandler.AttributeValueErrorHandler;
import org.eclipse.actf.model.internal.dom.sgml.errorhandler.DefaultErrorHandler;
import org.eclipse.actf.model.internal.dom.sgml.errorhandler.ITokenErrorHandler;
import org.eclipse.actf.model.internal.dom.sgml.modelgroup.IModelGroup;
import org.eclipse.actf.model.internal.dom.sgml.modelgroup.AndModelGroup.AndContext;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.DOMException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Text;
import org.xml.sax.DocumentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
/**
* Pure SGML parser. Base class of a parser which parses a SGML derived markup
* language. Language-dependent customization have to be provided by a subclass.
*/
public class SGMLParser implements ISGMLConstants, ISGMLParser {
private boolean isXHTML = false;
private boolean isEmptyElement = true;
private boolean isEndWithSlash = false;
private String currentTagName = "";
private DOMImplementation domImpl;
private static Class createDocumentMethodParamTypes[] = { String.class,
String.class, DocumentType.class };
/**
* @return <code>null</code> if failed.
*/
public DOMImplementation setDOMImplementation(DOMImplementation domImpl) {
Class domImpleInterface = DOMImplementation.class;
try {
java.lang.reflect.Method createDocumentMethod = domImpleInterface
.getMethod("createDocument", createDocumentMethodParamTypes);
if (createDocumentMethod != null) {
this.domImpl = domImpl;
doc = null;
return domImpl;
}
} catch (Exception e) {
}
return null;
}
/**
* @return A DOMImplementation instace this parser users.
*/
public DOMImplementation getDOMImplementation() {
return this.domImpl;
}
/**
* Constructs this instance and {@link DefaultErrorHandler}instance is
* added.
*/
public SGMLParser() {
addErrorHandler(new DefaultErrorHandler());
addTokenErrorHandler(new AttributeValueErrorHandler());
setDOMImplementation(SGMLDOMImpl.getDOMImplementation());
if (getDOMImplementation() == null) {
setDocument(new SGMLDocument());
}
anonymousElementDef = new ElementDefinition("ANONYMOUS", any);
}
public IErrorHandler errorHandlers[] = new IErrorHandler[8];
private ITokenErrorHandler tokenErrorHandlers[] = new ITokenErrorHandler[8];
public int errorHandlerNum = 0;
private int tokenErrorHandlerNum = 0;
/**
* Sets an error handler that recovers error.
*
* @param errorHandler
* @deprecated {@link #addErrorHandler(org.eclipse.actf.model.dom.html.IErrorHandler)}
*/
public void setErrorHandler(IErrorHandler errorHandler) {
addErrorHandler(errorHandler);
}
/**
* Adds an ErrorHandler instance. An errorHandler added later is invoked
* earlier by this parser instance than errorHandlers added earlier. If one
* errorHandler handles error (eg. returns <code>
* true</code>), no more
* errorHandlers are invoked.
*
* @param errorHandler
* errorHandler instance to be added to this parser
*/
public void addErrorHandler(IErrorHandler errorHandler) {
if (errorHandlerNum == errorHandlers.length) {
IErrorHandler newErrorHandlers[] = new IErrorHandler[errorHandlers.length * 2];
for (int i = 0; i < errorHandlerNum; i++) {
newErrorHandlers[i] = errorHandlers[i];
}
errorHandlers = newErrorHandlers;
}
this.errorHandlers[errorHandlerNum++] = errorHandler;
}
/**
* Adds an ErrorHandler instance. An errorHandler added later is invoked
* earlier by this parser instance than errorHandlers added earlier. If one
* errorHandler handles error (eg. returns <code>
* true</code>), no more
* errorHandlers are invoked.
*
* @param errorHandler
* errorHandler instance to be added to this parser
*/
public void addTokenErrorHandler(ITokenErrorHandler errorHandler) {
if (tokenErrorHandlerNum == tokenErrorHandlers.length) {
ITokenErrorHandler newErrorHandlers[] = new ITokenErrorHandler[errorHandlers.length * 2];
for (int i = 0; i < tokenErrorHandlerNum; i++) {
newErrorHandlers[i] = tokenErrorHandlers[i];
}
tokenErrorHandlers = newErrorHandlers;
}
this.tokenErrorHandlers[tokenErrorHandlerNum++] = errorHandler;
}
/**
* Gets node-level error handlers included in this parser
*
* @return error handlers
*/
public IErrorHandler[] getErrorHandlers() {
IErrorHandler ret[] = new IErrorHandler[errorHandlerNum];
for (int i = 0; i < errorHandlerNum; i++) {
ret[i] = errorHandlers[i];
}
return ret;
}
/**
* Gets token-level error handlers included in this parser.
*/
public ITokenErrorHandler[] getTokenErrorHandlers() {
ITokenErrorHandler ret[] = new ITokenErrorHandler[errorHandlerNum];
for (int i = 0; i < tokenErrorHandlerNum; i++) {
ret[i] = tokenErrorHandlers[i];
}
return ret;
}
/**
* Removes a node-level error handler.
*
* @param errorHandler
* error handler to remove
*/
public void removeErrorHandler(IErrorHandler errorHandler) {
for (int i = 0; i < errorHandlerNum; i++) {
if (errorHandlers[i] == errorHandler) {
errorHandlers[i] = null;
for (i++; i < errorHandlerNum; i++) {
errorHandlers[i - 1] = errorHandlers[i];
}
errorHandlerNum--;
return;
}
}
}
/**
* Removes a token-level error handler.
*
* @param errorHandler
* error handler to remove
*/
public void removeTokenErrorHandler(ITokenErrorHandler errorHandler) {
for (int i = 0; i < tokenErrorHandlerNum; i++) {
if (tokenErrorHandlers[i] == errorHandler) {
tokenErrorHandlers[i] = null;
for (i++; i < tokenErrorHandlerNum; i++) {
tokenErrorHandlers[i - 1] = tokenErrorHandlers[i];
}
errorHandlerNum--;
return;
}
}
}
/**
* This variable is for debugging.
*/
public static final boolean _DEBUG = false;
private boolean extractNum = true;
private boolean extractChar = true;
/**
* Determines if this parser extracts both character and number entities or
* not. Default value is <code>true</code>.
*/
public void extractEntity(boolean b) {
this.extractChar = this.extractNum = b;
}
/**
* Sets if it parses and extracts number entities or not. By default, it
* parses and number character entities.
*
* @param b
* if true, extracts number entities.
*/
public void extractNumEntity(boolean b) {
this.extractNum = b;
}
/**
* Checks if it parses and extracts number entities or not. By default, it
* parses and extracts number entities.
*
* @return true if extracts number entities. Otherwise false
*/
public boolean extractNumEntity() {
return this.extractNum;
}
/**
* Sets if it parses and extracts character entities or not. By default, it
* parses and extracts character entities.
*
* @param b
* if true, extracts character entities.
*/
public void extractCharEntity(boolean b) {
this.extractChar = b;
}
/**
* Checks if it parses and extracts character entities or not. By default,
* it parses and extracts character entities.
*
* @return true if extracts character entities. Otherwise false
*/
public boolean extractCharEntity() {
return extractChar;
}
public InsTokenizer tokenizer;
private Document doc = null;
/**
* Public entities are stored in this variable. Keys are id and values are
* file name. Both are instances of <code>java.lang.String</code>. For
* example, for the key "-//W3C//DTD HTML 4.0 Transitional//EN", the value
* is "loose.dtd"
*/
public static Hashtable<String, String> pubEntityMap = new Hashtable<String, String>();
/**
* Gets public entity map.
*
* @return Hashtable instance whose keys are public id and values are file
* name.
*/
public static Hashtable<String, String> getPublicEntityMap() {
return pubEntityMap;
}
/**
* Top element's ElementDefinition.
*/
public ElementDefinition lastDef = null;
static IModelGroup pcdata = new IModelGroup() {
public boolean match(ISGMLParser parser, Node parent, Node child) {
if (child instanceof Text && !(child instanceof CDATASection)) {
parent.appendChild(child);
return true;
} else {
return false;
}
}
public boolean optional() {
return false;
}
public void refer(boolean infinite) {
}
public String toString() {
return "#PCDATA";
}
public boolean match(int number) {
return true;
}
public boolean[] rehash(int totalSize) {
boolean ret[] = new boolean[totalSize];
ret[totalSize - 2] = true;
return ret;
}
};
static IModelGroup cdata = new IModelGroup() {
public boolean match(ISGMLParser parser, Node parent, Node child) {
if (child instanceof CDATASection) {
parent.appendChild(child);
return true;
} else {
return false;
}
}
public boolean optional() {
return false;
}
public void refer(boolean infinite) {
}
public String toString() {
return "CDATA";
}
public boolean match(int number) {
return false;
}
public boolean[] rehash(int totalSize) {
return null;
}
};
static IModelGroup empty = new IModelGroup() {
public boolean match(ISGMLParser parser, Node parent, Node child) {
return false;
}
public boolean optional() {
return true;
}
public void refer(boolean infinite) {
}
public String toString() {
return "EMPTY";
}
public boolean match(int number) {
return false;
}
public boolean[] rehash(int totalSize) {
return null;
}
};
static IModelGroup any = new IModelGroup() {
public boolean match(ISGMLParser parser, Node parent, Node child) {
parent.appendChild(child);
return true;
}
public boolean optional() {
return true;
}
public void refer(boolean infinite) {
}
public boolean match(int number) {
return true;
}
public boolean[] rehash(int totalSize) {
return null;
}
};
public ElementDefinition anonymousElementDef;
/**
* Gets a element definition for undefined elements.
*/
public ElementDefinition getAnonymousElementDefinition() {
return anonymousElementDef;
}
public SGMLDocTypeDef dtd;
/**
* Gets DTD that defines this Document's syntax.
*
* @return DTD that defines this Document's syntax.
*/
public SGMLDocTypeDef getDTD() {
return this.dtd;
}
/**
* Sets DTD that defines this Document's syntax.
*
* @param dtd
* DTD that defines this Document's syntax.
*/
public final void setDTD(SGMLDocTypeDef dtd) {
this.dtd = dtd;
if (doc instanceof SGMLDocument) {
((SGMLDocument) doc).setDTD(dtd);
}
isXHTML = (dtd.toString().indexOf("XHTML") > -1);
}
public Hashtable<Node, AndContext> andMap = new Hashtable<Node, AndContext>();
public Hashtable<Node, AndContext> getAndMap() {
return andMap;
}
public Hashtable<Node, Integer> seqMap = new Hashtable<Node, Integer>();
public Hashtable<Node, Integer> getSeqMap() {
return seqMap;
}
public Hashtable<Node, Node> plusMap = new Hashtable<Node, Node>();
public Hashtable<Node, Node> getPlusMap() {
return plusMap;
}
public void clearContextMap(Node parent) {
andMap.remove(parent);
seqMap.remove(parent);
plusMap.remove(parent);
}
private Attr attribute(ElementDefinition ed, AttributeListImpl attrlist)
throws IOException, ParseException, SAXException {
Attr ret = null;
if (tokenizer.nextToken() != NAME_CHAR) {
if (handleError(IParserError.TAG_NAME, tokenizer.sval)) {
return attribute(ed, attrlist);
}
} else {
String attName = changeAttrNameCase(tokenizer.sval);
AttributeDefinition ad = ed != null ? ed.getAttributeDef(attName)
: null;
String attValue = attName;
if (tokenizer.nextToken() == EQ) {
attValue = tokenizer.readAttributeValue(ad, ed);
ret = doc.createAttribute(attName);
ret.setValue(attValue);
} else {
tokenizer.pushBack();
ret = doc.createAttribute(attName);
}
if (ad == null) {
if (attrlist != null) {
attrlist.addAttribute(attName, "CDATA", attValue);
}
if (ed != null && ed != anonymousElementDef) {
if (handleError(IParserError.ILLEGAL_ATTRIBUTE, ret)) {
ret = attribute(ed, attrlist);
} else {
error(IParserError.ILLEGAL_ATTRIBUTE, "Illegal attribute '"
+ attName + "' for " + ed.getName());
}
}
} else if (attrlist != null) {
attrlist.addAttribute(attName, ad.getDeclaredTypeStr(),
attValue);
}
}
return ret;
}
/**
* Records an error. Does nothing by default. If it has ErrorLogListener
* instances, calls their {@link IErrorLogListener#errorLog(int,String)}
* method.
*
* @param code
* error code.
* @param msg
* message of the error.
*/
public final void error(int code, String msg) {
for (int i = 0; i < errorLogListenerNum; i++) {
if (tokenizer != null) {
errorLogListeners[i].errorLog(code, tokenizer.getCurrentLine()
+ ": " + msg);
} else {
errorLogListeners[i].errorLog(code, msg);
}
}
}
public IErrorLogListener errorLogListeners[] = new IErrorLogListener[8];
public int errorLogListenerNum = 0;
/**
* Adds an error log listerner. Listener is invoked when error is occured.
*/
public void addErrorLogListener(IErrorLogListener listener) {
if (errorLogListenerNum == errorLogListeners.length) {
IErrorLogListener newListeners[] = new IErrorLogListener[errorLogListenerNum * 2];
for (int i = 0; i < errorLogListenerNum; i++) {
newListeners[i] = errorLogListeners[i];
}
errorLogListeners = newListeners;
}
errorLogListeners[errorLogListenerNum++] = listener;
}
/**
* Removes an error log listener.
*
* @param lister
* error log listener to remove.
*/
public void removeErrorLogListener(IErrorLogListener listener) {
for (int i = 0; i < errorLogListenerNum; i++) {
if (errorLogListeners[i] == listener) {
errorLogListeners[i] = null;
for (i++; i < errorLogListenerNum; i++) {
errorLogListeners[i - 1] = errorLogListeners[i];
}
errorLogListenerNum--;
return;
}
}
}
private EndTag etag() throws IOException, ParseException, SAXException {
String tagName;
if (tokenizer.nextToken() == NAME_CHAR) {
tagName = changeTagCase(tokenizer.sval);
while (tokenizer.nextToken() != TAGC && tokenizer.ttype != EOF) {
}
;
EndTag et = new EndTag(tagName);
currentNode = et;
if (docHandler != null && !eHandleLogical) {
docHandler.endElement(tagName);
}
ElementDefinition ed = dtd.getElementDefinition(tagName);
if (ed != null) {
lastElementNumber = ed.number;
return et;
} else if (keepUnknowns) {
lastElementDef = anonymousElementDef;
lastElementNumber = pcdataNumber + 1;
return et;
} else if (handleError(IParserError.UNKNOWN_ELEMENT, et)) {
return null;
}
}
return null;
}
/* (non-Javadoc)
* @see org.eclipse.actf.model.dom.sgml.impl.ISGMLParser#getDocument()
*/
public Document getDocument() {
return doc;
}
/**
* push back buffer size
*/
private static final int BUF_SIZ = 256;
public int bufCount = 0;
public Node buf[] = new Node[BUF_SIZ];
/**
* Gets a Node or {@link EndTag}from a currently reading stream as a result
* of tokenizing.
*
* @return {@link org.w3c.dom.Node Node}or {@link EndTag}
* @exception ParseException
* @exception IOException
* @see #pushBackNode(org.w3c.dom.Node)
*/
public Node getNode() throws ParseException, IOException, SAXException {
if (bufCount == 0) {
return node();
} else {
return buf[--bufCount];
}
}
/**
* Pushes back a node to this parser.
*
* @param node
* node to be pushed back.
* @see #getNode()
*/
public void pushBackNode(Node node) {
buf[bufCount++] = node;
if (node instanceof Element) {
lastElementDef = dtd.getElementDefinition(node.getNodeName());
lastElementNumber = lastElementDef.number;
}
}
/**
* Gets a resource reader for this parser. By default, this class has no
* resource. So if some resource is required, override this method in a
* subclass.
*
* @exception IOException
* always thrown.
*/
protected Reader getResource(String resourceName) throws IOException {
throw new IOException("cannot find " + resourceName);
}
public Node node() throws ParseException, IOException, SAXException {
Node ret;
// TODO emulate <tag /> by using EndTag
if (isEndWithSlash && !isEmptyElement) {
// if(isXHTML && isEndWithSlash){
isEndWithSlash = false;
EndTag et = new EndTag(currentTagName);
currentNode = et;
if (docHandler != null && !eHandleLogical) {
docHandler.endElement(currentTagName);
}
ElementDefinition ed = dtd.getElementDefinition(currentTagName);
if (ed != null) {
lastElementNumber = ed.number;
return et;
} else if (keepUnknowns) {
lastElementDef = anonymousElementDef;
lastElementNumber = pcdataNumber + 1;
return et;
} else if (handleError(IParserError.UNKNOWN_ELEMENT, et)) {
return null;
}
}
switch (tokenizer.nextToken()) {
case STAGO:
ret = stag();
if (ret == null) { // Unknown Element. Skip it.
ret = getNode();
}
break;
case ETAGO:
ret = etag();
if (ret == null) { // Unknown Element. Skip it.
ret = node();
}
break;
case WHITESPACE:
currentNode = ret = doc.createTextNode(tokenizer.sval);
if (ret instanceof SGMLText) {
((SGMLText) ret).setIsWhitespaceInElementContent(true);
}
if (docHandler != null) {
if (saxch != null) {
docHandler.characters(saxch, begin, len);
saxch = null;
}
}
break;
case PCDATA:
currentNode = ret = doc.createTextNode(tokenizer.sval);
if (docHandler != null) {
if (saxch != null) {
docHandler.characters(saxch, begin, len);
saxch = null;
}
}
break;
case COMMENT:
currentNode = ret = doc.createComment(tokenizer.sval);
if (lexHandler != null) {
lexHandler.comment(saxch, begin, len);
saxch = null;
}
break;
case PI:
currentNode = ret = doc.createProcessingInstruction(null,
tokenizer.sval);
if (docHandler != null) {
docHandler.processingInstruction(null, tokenizer.sval);
}
break;
case EOF:
ret = null;
break;
case MDO:
error(IParserError.ILLEGAL_DOCTYPE,
"Illegal Declaration. Discarding to next '>'");
if (tokenizer.nextToken() != '>') {
// consume '>'
tokenizer.consumeUntil('>');
tokenizer.switchTo(DEFAULT);
}
return node();
default:
error(IParserError.MISC_ERR,
"Internal Parser Error: character encoding may be wrong.");
return node();
}
return ret;
}
/**
* Set up syntax information described by DTD.
*
* @param publicID
* DTD's public id that specifies which to set up.
*/
public void setupDTD(String publicID) throws ParseException, IOException {
publicID = makeUnique(publicID);
SGMLDocTypeDef ret = SGMLDocTypeDef.getPublic(publicID);
if (ret != null) {
setDTD(ret);
lastDef = ret.getElementDefinition(getDefaultTopElement());
} else {
synchronized (publicID) {
ret = SGMLDocTypeDef.getPublic(publicID);
if (ret == null) {
ret = SGMLDocTypeDef.createPublic(publicID, this);
setDTD(ret);
Reader dr = getResource((String) pubEntityMap.get(publicID));
DTDTokenizer tok = new DTDTokenizer(dr);
new DTDParser(tok, ret).readDTD();
dr.close();
SGMLDocTypeDef.putPublic(publicID, ret);
ret.rehash();
} else {
setDTD(ret);
}
}
}
lastDef = dtd.getElementDefinition(getDefaultTopElement());
pcdataNumber = dtd.getElementCount();
for (int i = depth - 1; i >= 0; i--) {
ancesterElementDefs[i] = dtd
.getElementDefinition(ancesterElementDefs[i].getName());
if (ancesterElementDefs[i] == null) {
error(IParserError.UNKNOWN_ELEMENT, ancesters[i].getNodeName()
+ " is not defined in " + publicID);
ancesterElementDefs[i] = anonymousElementDef;
}
}
anonymousElementDef.rehash(pcdataNumber + 2);
anonymousElementDef.number = pcdataNumber + 1;
}
public int pcdataNumber;
/**
* Parses SGML a document and return its top element. SGML documents are
* consists of three parts. 1. SGML declaration. 2. Dcument type definition.
* 3. SGML instances. If a document misses 1. and 2., this parser try to
* read the default declaration specified by
* {@link #setDefaultDTD(java.lang.String)}.
*
* @param reader
* parse to read.
* @return document.
* @exception PaserException
* If unrecoverable syntax or token error occured, throwed
* @exception IOException
*/
public Node parse(Reader reader) throws ParseException, IOException,
SAXException {
if (domImpl == null && doc == null) {
throw new ParseException("No factory instance.");
}
init();
this.tokenizer = new InsTokenizer(reader, this);
if (docHandler != null) {
docHandler.setDocumentLocator(this.tokenizer);
docHandler.startDocument();
}
setDTD(SGMLDocTypeDef.createAnonymous(this));
DocumentType docType = readDocType();
if (docType == null) {
error(IParserError.DOCTYPE_MISSED, "<!DOCTYPE ...> is missing. Try to use \""
+ defaultDTD + "\" as document type");
setupDTD(defaultDTD);
}
tokenizer.extractNumEntity(extractNum);
tokenizer.extractCharEntity(extractChar);
tokenizer.setPreserveWhitespace(preserveWhitespace);
if (doc == null) {
doc = createDocument(docType);
if (doc instanceof SGMLDocument
&& ((SGMLDocument) doc).getDTD() == null) {
((SGMLDocument) doc).setDTD(this.dtd);
}
while (!commentsBeforeDoctype.isEmpty()) {
CATB catb = (CATB) commentsBeforeDoctype.lastElement();
Node node = catb.comment ? (Node) doc.createComment(catb.str)
: doc.createProcessingInstruction(null, catb.str);
doc.insertBefore(node, doc.getFirstChild());
commentsBeforeDoctype.removeElement(catb);
}
}
// //041026 handle comments after doctype
// comment_loop: while (true) {
// switch (tokenizer.nextToken()) {
// case COMMENT:
// doc.appendChild(doc.createComment(tokenizer.sval));
// if (lexHandler != null) {
// lexHandler.comment(saxch, begin, len);
// }
// break;
// case PI:
// doc.appendChild(doc.createComment(tokenizer.sval));
// if (docHandler != null) {
// docHandler.processingInstruction(null, tokenizer.sval);
// }
// break;
// case MDO:
// break comment_loop;
// default:
// tokenizer.pushBack();
// break comment_loop;
// }
// }
// dummy
context = doc.createElement("dummy0");
seqArray = new IModelGroup[dtd.maxSeqLength];
Node ret = readInstances();
if (docHandler != null)
docHandler.endDocument();
return ret;
}
public IModelGroup seqArray[];
public IModelGroup[] getSeqArray() {
return this.seqArray;
}
protected Document createDocument(DocumentType docType) {
Document ret = domImpl.createDocument("dummy1", "dummy1",
docType);
if (ret.getDocumentElement() != null) {
ret.removeChild(ret.getDocumentElement());
}
return ret;
}
/**
* Context element.
*/
public Element context = null;
/**
* Gets a current context element.
*
* @return context element
* @see #setContext(org.w3c.dom.Element)
*/
public Element getContext() {
return this.context;
}
public Element forwardPath[] = new Element[BUF_SIZ];
public ElementDefinition ancesterElementDefs[] = new ElementDefinition[BUF_SIZ];
public Element ancesters[] = new Element[BUF_SIZ];
public int depth = 0;
public AttributeListImpl nullAttributeList = createAttributeList();
private void setContextForward(Element element) throws SAXException {
ElementDefinition ed;
if (eHandleLogical && docHandler != null) {
for (Node down = context.getLastChild(); down instanceof Element; down = down
.getLastChild()) {
if (down == element) {
ancesters[depth] = element;
ancesterElementDefs[depth] = lastElementDef;
depth++;
docHandler.startElement(element.getNodeName(), attrlist);
attrlist = null;
this.context = element;
return;
} else {
ancesters[depth] = (Element) down;
ancesterElementDefs[depth] = ed = dtd
.getElementDefinition(down.getNodeName());
if (ed == null) {
ancesterElementDefs[depth] = anonymousElementDef;
}
docHandler.startElement(down.getNodeName(),
nullAttributeList);
depth++;
}
}
} else {
for (Node down = context.getLastChild(); down instanceof Element; down = down
.getLastChild()) {
if (down == element) {
ancesters[depth] = element;
ancesterElementDefs[depth] = lastElementDef;
depth++;
attrlist = null;
this.context = element;
return;
} else {
ancesters[depth] = (Element) down;
ancesterElementDefs[depth] = ed = dtd
.getElementDefinition(down.getNodeName());
if (ed == null) {
ancesterElementDefs[depth] = anonymousElementDef;
}
}
depth++;
}
}
throw new RuntimeException("Internal Parser Error.");
}
private void setContextBackward(int newDepth) throws SAXException {
if (eHandleLogical && docHandler != null) {
for (int i = depth - 1; i >= newDepth; i--) {
docHandler.endElement(ancesters[i].getNodeName());
}
}
depth = newDepth;
this.context = ancesters[newDepth - 1];
return;
}
/**
* Sets current context element node.
*
* @param element
* new context.
* @see #getContext()
*/
public final void setContext(Element element) throws SAXException {
for (int i = depth - 1; i >= 0; i--) {
Element up = ancesters[i];
if (up == element) {
if (eHandleLogical && docHandler != null) {
for (int j = depth - 1; j > i; j--) {
docHandler.endElement(ancesters[j].getNodeName());
}
}
depth = i + 1;
this.context = element;
return;
}
int forwardPathLen = 0;
for (Node down = up.getLastChild(); down instanceof Element; down = down
.getLastChild()) {
if (down == element) {
if (eHandleLogical && docHandler != null) {
for (int j = depth - 1; j > i; j--) {
docHandler.endElement(ancesters[j].getNodeName());
}
for (int j = 0; j < forwardPathLen; j++) {
docHandler.startElement(forwardPath[j]
.getNodeName(), nullAttributeList);
ancesters[i + j + 1] = forwardPath[j];
ancesterElementDefs[i + j + 1] = dtd
.getElementDefinition(forwardPath[j]
.getNodeName());
if (ancesterElementDefs[i + j + 1] == null) {
ancesterElementDefs[i + j + 1] = anonymousElementDef;
}
}
docHandler.startElement(down.getNodeName(), attrlist);
} else {
for (int j = 0; j < forwardPathLen; j++) {
ancesters[i + j + 1] = forwardPath[j];
ancesterElementDefs[i + j + 1] = dtd
.getElementDefinition(forwardPath[j]
.getNodeName());
if (ancesterElementDefs[i + j + 1] == null) {
ancesterElementDefs[i + j + 1] = anonymousElementDef;
}
}
}
depth = i + forwardPathLen + 1;
ancesters[depth] = element;
ancesterElementDefs[depth] = lastElementDef;
depth++;
attrlist = null;
this.context = element;
return;
} else {
forwardPath[forwardPathLen++] = (Element) down;
}
}
}
// creates backward context.
int newDepth = 0;
for (Node up = element; up instanceof Element; up = up.getParentNode()) {
newDepth++;
}
Element newAncesters[] = new Element[newDepth];
int i = 1;
for (Node up = element; up instanceof Element; up = up.getParentNode()) {
newAncesters[newDepth - i] = (Element) up;
i++;
}
for (i = 0; i < newDepth; i++) {
if (ancesters[i] != newAncesters[i]) {
if (eHandleLogical && docHandler != null) {
for (int j = depth - 1; j >= i; j--) {
docHandler.endElement(ancesters[j].getNodeName());
}
for (int j = i; j < newDepth - 1; j++) {
docHandler.startElement(newAncesters[j].getNodeName(),
nullAttributeList);
}
docHandler.startElement(element.getNodeName(), attrlist);
}
while (i < newDepth - 1) {
ancesters[i] = newAncesters[i];
ancesterElementDefs[i] = dtd
.getElementDefinition(newAncesters[i].getNodeName());
}
ancesters[newDepth - 1] = element;
ancesterElementDefs[newDepth - 1] = lastElementDef;
depth = newDepth;
attrlist = null;
this.context = element;
return;
}
}
}
public Vector<Element> nodesWithEndtag = new Vector<Element>();
/**
* Checks if a specified element has its end tag or not.
*
* @param element
* element to be checked.
* @return <code>true</code> if <code>element</code> has its end tag.
* Otherwise, false.
*/
public boolean hasEndTag(Element element) {
return nodesWithEndtag.contains(element);
}
/**
* Determines that a specified element has its end tag.
*/
public void setHasEndTag(Element element) {
nodesWithEndtag.addElement(element);
}
private void setTopElement(Element element) throws SAXException {
Element prev = doc.getDocumentElement();
if (prev != null) {
doc.replaceChild(element, prev);
} else {
doc.appendChild(element);
}
context = element;
if (docHandler != null && eHandleLogical) {
AttributeListImpl al = attrlist != null ? attrlist
: nullAttributeList;
docHandler.startElement(element.getNodeName(), al);
attrlist = null;
}
ancesterElementDefs[0] = lastDef;
ancesters[0] = element;
depth++;
}
private Vector<EndTag> missedEndtags = new Vector<EndTag>();
private Node readInstances() throws ParseException, IOException,
SAXException {
Node node = getNode();
if (node == null)
return doc;
while (node.getNodeType() == Node.COMMENT_NODE
|| node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
if (keepComment)
doc.appendChild(node);
node = getNode();
}
switch (node.getNodeType()) {
case ENDTAG:
if (!handleError(IParserError.FLOATING_ENDTAG, node)) {
if (eHandleLogical && docHandler != null) {
docHandler.ignorableWhitespace(saxch, begin, len);
}
error(IParserError.FLOATING_ENDTAG, "Illegal end tag: " + node
+ ". Ignore it.");
}
return readInstances();
case Node.ELEMENT_NODE:
if (lastDef.instance(node)) {
setTopElement((Element) node);
} else {
AttributeListImpl attrlisttmp = attrlist;
attrlist = null;
setTopElement(doc.createElement(changeDefaultTagCase(lastDef
.getName())));
attrlist = attrlisttmp;
addAutoGenerated(context);
if (lastDef.getContentModel().match(this, context, node)) {
if (!lastDef.startTagOmittable()) {
error(IParserError.ILLEGAL_TOP_ELEMENT, node
+ " can't be a top element.");
}
setContextForward((Element) node);
} else if (!handleError(IParserError.ILLEGAL_CHILD, node)) {
addErrorNode(context);
error(IParserError.ILLEGAL_CHILD, node
+ " is not allowed as a child of " + context);
context.appendChild(node);
setContextForward((Element) node);
}
}
postElement((Element) node);
break;
case Node.TEXT_NODE:
if (preserveWhitespace && whitespaceText((Text) node)) {
return readInstances();
}
error(IParserError.ILLEGAL_TOP_ELEMENT, "#text can't be a top element");
setTopElement(doc.createElement(changeDefaultTagCase(lastDef
.getName())));
addAutoGenerated(context);
context.appendChild(node);
break;
default:
throw new ParseException(tokenizer.getCurrentLine()
+ ": Internal Parser Error " + node);
}
return readInstances2();
}
private Node readInstances2() throws ParseException, IOException,
SAXException {
ElementDefinition ed;
Node node;
outer: for (node = getNode(); node != null; node = getNode()) {
// System.out.println(node.getNodeType()+" : "+node);
// System.out.println(node.toString());
node_sel: switch (node.getNodeType()) {
case Node.COMMENT_NODE:
case Node.PROCESSING_INSTRUCTION_NODE:
if (keepComment)
context.appendChild(node);
break;
case ENDTAG:
missedEndtags.removeAllElements();
if (_DEBUG
&& node.getNodeName().equalsIgnoreCase(
System.getProperty("DEBUG_ENDTAG"))) {
System.err.println("DEBUG: " + node);
}
for (int i = depth - 1; i >= 0; i--) {
if (ancesterElementDefs[i].number == lastElementNumber
&& (lastElementNumber != pcdataNumber + 1 || ancesters[i]
.getNodeName().equalsIgnoreCase(
node.getNodeName()))) {
if (!missedEndtags.isEmpty()) {
extraErrInfo = missedEndtags;
if (handleError(IParserError.SUDDEN_ENDTAG, node)) {
extraErrInfo = null;
break node_sel;
}
extraErrInfo = null;
error(IParserError.SUDDEN_ENDTAG, missedEndtags
+ " have been forced to be inserted by "
+ node);
}
/*
* if (ancesterElementDefs[depth - 1].number ==
* lastElementNumber) {
* nodesWithEndtag.addElement(context); }
*/
((EndTag) node).setElement(ancesters[i]);
nodesWithEndtag.addElement(ancesters[i]);
if (i > 0) {
setContextBackward(i);
} else {
break outer;
}
break node_sel;
} else {
if (!ancesterElementDefs[i].endTagOmittable()) {
missedEndtags.insertElementAt(new EndTag(
ancesters[i].getNodeName()), 0);
}
}
}
if (!handleError(IParserError.FLOATING_ENDTAG, node)) {
if (eHandleLogical && docHandler != null) {
docHandler.ignorableWhitespace(saxch, begin, len);
}
error(IParserError.FLOATING_ENDTAG, "Illegal end tag: " + node
+ ". Ignore it");
}
break;
case Node.ELEMENT_NODE:
Element element = (Element) node;
Element exParent = null;
if (_DEBUG
&& element.getTagName().equalsIgnoreCase(
System.getProperty("DEBUG_STARTTAG"))) {
System.err.println("DEBUG: " + element);
}
// System.out.println("element");
for (int i = depth - 1; i >= 0; i--) {
ed = ancesterElementDefs[i];
if (!ed.endTagOmittable() && exParent == null) {
exParent = ancesters[i];
}
// System.out.println(ed.toString());
if (ed.exclusion(lastElementNumber)) {
if (exParent != null) {
if (!handleError(IParserError.ILLEGAL_CHILD, node)) {
addErrorNode(context);
error(IParserError.ILLEGAL_CHILD, node
+ " is an exception uner "
+ ancesters[i]);
} else {
if (context != node && eHandleLogical
&& docHandler != null) {
docHandler.startElement(node.getNodeName(),
attrlist);
}
postElement(element);
break node_sel;
}
} else if (ancesters[i - 1] != null) {
setContextBackward(i);
}
break;
}
// System.out.println("elenent: mid");
if (ed.inclusion(lastElementNumber)) {
context.appendChild(node);
setContextForward(element);
postElement(element);
break node_sel;
}
}
ed = ancesterElementDefs[depth - 1];
IModelGroup contentModel = ed.getContentModel();
// TODO correct this
if (contentModel.match(lastElementNumber)
&& contentModel.match(this, context, node)) {
// System.out.println("model: fow");
setContextForward(element);
} else if (ed.endTagOmittable()) {
// System.out.println("model: omit");
boolean found = false;
for (int i = depth - 2; i >= 0; i--) {
ed = ancesterElementDefs[i];
contentModel = ed.getContentModel();
if (contentModel.match(lastElementNumber)
&& (found = contentModel.match(this,
ancesters[i], node))
|| !ed.endTagOmittable()) {
break;
}
}
// System.out.println("model: omit2 "+found);
if (found) {
// System.out.println("model: ok");
setContext(element);
} else if (!handleError(IParserError.ILLEGAL_CHILD, node)) {
// System.out.println("model: child");
context.appendChild(element);
addErrorNode(context);
error(IParserError.ILLEGAL_CHILD, node
+ " is not allowed as a child of " + context);
setContextForward(element);
} else if (element.getParentNode() != null) {
// unless the error handlers ignore the node
// System.out.println("model: post");
postElement(element);
break;
}
// System.out.println("model: omit3");
} else if (!handleError(IParserError.ILLEGAL_CHILD, node)) {
// System.out.println("model: error");
// TODO ???
context.appendChild(element);
addErrorNode(context);
error(IParserError.ILLEGAL_CHILD, node
+ " is not allowed as a child of " + context);
// System.out.println(node.getNodeName()+context);
setContextForward(element);
}
// System.out.println("model: end");
if (context != node && eHandleLogical && docHandler != null) {
docHandler.startElement(node.getNodeName(), attrlist);
}
postElement(element);
break;
case Node.TEXT_NODE:
if (preserveWhitespace && whitespaceText((Text) node)) {
getContext().appendChild(node);
break;
}
ed = ancesterElementDefs[depth - 1];
contentModel = ed.getContentModel();
if (contentModel.match(pcdataNumber)
&& contentModel.match(this, context, node)) {
break;
} else if (ed.endTagOmittable()) {
for (int i = depth - 2; i >= 0; i--) {
ed = ancesterElementDefs[i];
contentModel = ed.getContentModel();
if (contentModel.match(pcdataNumber)
&& contentModel.match(this, ancesters[i], node)) {
break node_sel;
} else if (!ed.endTagOmittable()) {
break;
}
}
}
if (handleError(IParserError.ILLEGAL_CHILD, node))
break node_sel;
addErrorNode(context);
error(IParserError.ILLEGAL_CHILD, "#text(" + node
+ ") is not allowed as a child of " + context);
context.appendChild(node);
break;
default:
throw new ParseException(tokenizer.getCurrentLine()
+ ": Internal parser error " + node);
}
}
// System.out.println("SGMLParser: a");
if (docHandler != null && eHandleLogical) {
Element top = doc.getDocumentElement();
if (getContext() != top) {
setContext(top);
}
if (top != null)
docHandler.endElement(top.getNodeName());
}
if ((node = getNode()) != null) {
this.context = doc.getDocumentElement();
if (docHandler != null && eHandleLogical) {
docHandler.startElement(this.context.getNodeName(),
nullAttributeList);
}
// System.out.println("SGMLParser: pushback");
pushBackNode(node);
readInstances2();
}
// System.out.println("SGMLParser: end");
return doc;
}
private boolean whitespaceText(Text text) {
if (text instanceof SGMLText) {
return ((SGMLText) text).getIsWhitespaceInElementContent();
}
char str[] = text.getData().toCharArray();
for (int i = str.length - 1; i >= 0; i--) {
if (!Character.isWhitespace(str[i]))
return false;
}
return true;
}
private void postElement(Element element) throws ParseException,
IOException, SAXException {
IModelGroup mg = lastElementDef.getContentModel();
if (mg == cdata) {
String tagName = element.getNodeName();
Node cdata = readCDATA(tagName);
currentNode = new EndTag(tagName);
((EndTag) currentNode).setElement(element);
nodesWithEndtag.addElement(element);
if (docHandler != null && !eHandleLogical) {
docHandler.endElement(tagName);
}
element.appendChild(cdata);
if (context == element)
setContextBackward(depth - 1);
} else if (mg == empty && lastElementDef.endTagOmittable()) { // must
// be
// omitted
// System.out.println(element.toString()+lastElementDef.toString());
if (context == element) {
setContextBackward(depth - 1);
} else if (eHandleLogical && docHandler != null) {
docHandler.endElement(element.getNodeName());
}
} else if (isXHTML && isEndWithSlash) {
// System.out.println(element.toString()+lastElementDef.toString());
}
}
private CDATASection readCDATA(String arg) throws ParseException,
IOException, SAXException {
if (lexHandler != null) {
lexHandler.startCDATA();
}
String str = tokenizer.rawText(arg);
CDATASection ret = doc.createCDATASection(str);
currentNode = ret;
if (lexHandler != null) {
lexHandler.endCDATA();
}
tokenizer.eatCDATAEndTag();
return ret;
}
private Vector<CATB> commentsBeforeDoctype = new Vector<CATB>();
private Vector commentsAftereDoctype = new Vector();
/**
* comment at the beginning
*/
class CATB {
/**
* If comment, true. Otherwise, false.
*/
boolean comment = true;
String str;
}
private DocumentType readDocType() throws ParseException, IOException,
SAXException {
if (lastDef != null) {
throw new ParseException("Already read DOCTYPE declaration");
}
comment_loop: while (true) {
switch (tokenizer.nextToken()) {
case COMMENT:
if (doc == null) {
CATB catb = new CATB();
catb.str = tokenizer.sval;
commentsBeforeDoctype.addElement(catb);
} else {
currentNode = doc.createComment(tokenizer.sval);
doc.appendChild(currentNode);
}
if (lexHandler != null) {
lexHandler.comment(saxch, begin, len);
}
break;
case PI:
if (doc == null) {
CATB catb = new CATB();
catb.comment = false;
catb.str = tokenizer.sval;
commentsBeforeDoctype.addElement(catb);
} else {
currentNode = doc.createProcessingInstruction(null,
tokenizer.sval);
doc.appendChild(currentNode);
}
if (docHandler != null) {
docHandler.processingInstruction(null, tokenizer.sval);
}
break;
case MDO:
break comment_loop;
default:
tokenizer.pushBack();
return null;
}
}
if (tokenizer.nextToken() != NAME_CHAR
&& !tokenizer.sval.equals("DOCTYPE")) {
throw new ParseException("Unknown declaration at "
+ tokenizer.getCurrentLine());
}
/*
* Only supports initially setted public entity. For example, <!DOCTYPE
* HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
*/
if (tokenizer.nextToken() == NAME_CHAR) {
String docTypeName = tokenizer.sval;
this.defaultTopElement = docTypeName;
if (tokenizer.nextToken() == NAME_CHAR
&& tokenizer.sval.equalsIgnoreCase("PUBLIC")) {
if (tokenizer.nextToken() == '"') {
String publicID = tokenizer.eatUntil('"');
if (enforcedDoctype != null) {
publicID = enforcedDoctype;
}
if (lexHandler != null) {
lexHandler.startDTD(docTypeName, publicID, null);
}
String entityFileName = (String) pubEntityMap.get(publicID);
if (entityFileName == null) {
if (defaultDTD != null) {
error(IParserError.ILLEGAL_DOCTYPE, "Instead of \"" + publicID
+ "\" use \"" + defaultDTD + "\" as a DTD.");
entityFileName = (String) pubEntityMap
.get(defaultDTD);
}
if (entityFileName == null) {
throw new ParseException(tokenizer.getCurrentLine()
+ ": this parser does not support "
+ publicID);
}
publicID = defaultDTD;
}
setupDTD(publicID);
if (domImpl == null) {
domImpl = doc.getImplementation();
}
DocumentType ret = null;
if (domImpl != null) {
currentNode = ret = createDocType(domImpl, docTypeName,
publicID);
}
// consume '>'
tokenizer.consumeUntil('>');
tokenizer.switchTo(DEFAULT);
lastDef = dtd.getElementDefinition(docTypeName);
if (lastDef == null) {
String topElementName = getDefaultTopElement();
error(IParserError.ILLEGAL_DOCTYPE, docTypeName
+ " is not defined as a root element. Use "
+ topElementName + '.');
lastDef = dtd.getElementDefinition(topElementName);
}
if (lexHandler != null) {
lexHandler.endDTD();
}
return ret;
}
}
}
tokenizer.consumeUntil('>');
error(IParserError.ILLEGAL_DOCTYPE, "Invalid DOCTYPE declaration. Use " + defaultDTD);
setupDTD(defaultDTD);
lastDef = dtd.getElementDefinition(getDefaultTopElement());
return null;
}
private DocumentType createDocType(DOMImplementation domImpl,
String docTypeName, String publicID) {
/*
* For compatibility to DOM level 1
*/
Class domImplClass = domImpl.getClass();
Class stringClass = docTypeName.getClass();
Class parameterTypes[] = { stringClass, stringClass, stringClass };
java.lang.reflect.Method method;
try {
method = domImplClass.getMethod("createDocumentType",
parameterTypes);
} catch (NoSuchMethodException e) {
return null;
}
String args[] = { docTypeName, publicID, "" };
try {
return (DocumentType) method.invoke(domImpl, (Object[])args);
} catch (IllegalAccessException e) {
return null;
} catch (java.lang.reflect.InvocationTargetException e) {
return null;
} catch (DOMException e) {
return null;
}
/*
* For DOM level 2 try { return domImpl.createDocumentType(docTypeName,
* publicID, ""); } catch (DOMException e) { return null; }
*/
}
private Node nodesWithIllegalChildren[] = new Node[BUF_SIZ];
private int nodeWithIllegalChildNum = 0;
private void expandNodesWithIllegalChildren() {
Node newNodes[] = new Node[nodeWithIllegalChildNum * 2];
System.arraycopy(nodesWithIllegalChildren, 0, newNodes, 0,
nodeWithIllegalChildNum);
nodesWithIllegalChildren = newNodes;
}
private void addErrorNode(Node node) {
for (int i = nodeWithIllegalChildNum - 1; i >= 0; i--) {
if (nodesWithIllegalChildren[i] == node)
return;
}
if (nodesWithIllegalChildren.length == nodeWithIllegalChildNum) {
expandNodesWithIllegalChildren();
}
nodesWithIllegalChildren[nodeWithIllegalChildNum++] = node;
}
/**
* Checks if a specified node has an error or not.
*
* @param node
* node to be ckecked.
* @return <code>true</code> if <code>node</code> is an error node.
* Otherwise <code>false</code>
*/
public boolean isErrorNode(Node node) {
for (int i = nodeWithIllegalChildNum - 1; i >= 0; i--) {
if (nodesWithIllegalChildren[i] == node)
return true;
}
return false;
}
/**
* Initialized this parser.
*/
protected void init() {
lastDef = null;
context = null;
nodeWithIllegalChildNum = 0;
depth = 0;
andMap.clear();
seqMap.clear();
plusMap.clear();
commentsBeforeDoctype.removeAllElements();
commentsAftereDoctype.removeAllElements();
if (getDOMImplementation() != null) {
setDOMImplementation(getDOMImplementation());
} else {
for (Node child = doc.getFirstChild(); child != null; child = doc
.getFirstChild()) {
doc.removeChild(child);
}
}
}
int getCharEntity(String entity) throws IOException, ParseException,
SAXException {
SGMLEntityReference er = null;
try {
er = dtd.getEntityReference(entity);
} catch (ParseException e) {
return -1;
}
SGMLEntityDeclaration ed = er.getEntityDeclaration();
int ch = ed.getReplacementChar();
if (ch != -1) {
return ch;
}
InsTokenizer tokenizer2 = new InsTokenizer(ed.getReplacementReader(),
this);
if (tokenizer2.nextToken() == PCDATA && tokenizer2.sval.length() == 1) {
char ret = tokenizer2.sval.charAt(0);
ed.setReplacementChar(ret);
return ret;
} else {
throw new ParseException("Internal Parser Error: " + entity
+ " not defined.");
}
}
/**
* Sets a <i>Document </i> instance that will be a factory of nodes in DOM
* tree. This parser can use any implementation of W3C's DOM. By default,
* Parsers use {@link SGMLDocument}. This method make the parser instance
* ignore DOMImplementation by side effects.
*
* @param doc
* new Document instance.
* @see #getDocument()
* @deprecated See {@link #setDOMImplementation(DOMImplementation)}
*/
public void setDocument(Document doc) {
this.doc = doc;
domImpl = null;
}
public AttributeListImpl attrlist = null;
public int lastElementNumber;
public ElementDefinition lastElementDef;
private Node stag() throws IOException, ParseException, SAXException {
Element ret;
isEmptyElement = true;
isEndWithSlash = false;
if (tokenizer.nextToken() != NAME_CHAR) {
if (handleError(IParserError.TAG_NAME, tokenizer.sval)) {
return stag();
}
error(IParserError.STARTTAG_SYNTAX_ERR,
"Perhaps character encoding may not be correct.");
while (tokenizer.nextToken() != NAME_CHAR) {
if (tokenizer.ttype == EOF || tokenizer.ttype == TAGC) {
return null;
}
}
}
ElementDefinition ed = this.dtd.getElementDefinition(tokenizer.sval);
if (ed != null) {
lastElementNumber = ed.number;
lastElementDef = ed;
isEmptyElement = ed.getContentModel().toString().equalsIgnoreCase(
"EMPTY");
} else if (keepUnknowns) {
lastElementNumber = pcdataNumber + 1;
ed = lastElementDef = anonymousElementDef;
}
currentNode = ret = doc.createElement(changeTagCase(tokenizer.sval));
/*
* boolean syntaxError = false;
*/
Attr attr = null;
if (docHandler != null)
attrlist = createAttributeList();
isEndWithSlash = false;
while (tokenizer.nextToken() != TAGC) {
if (tokenizer.ttype == '/') {
isEndWithSlash = true;
} else {
isEndWithSlash = false;
}
if (tokenizer.ttype == STAGO || tokenizer.ttype == ETAGO) {
if (handleError(IParserError.BEFORE_ATTRNAME, tokenizer.sval)) {
continue;
}
error(IParserError.STARTTAG_SYNTAX_ERR, "requires an attribute in " + ret);
tokenizer.pushBack();
break;
} else if (tokenizer.ttype == EOF) {
break;
}
tokenizer.pushBack();
attr = attribute(ed, attrlist);
if (attr != null) {
ret.setAttributeNode(attr);
/*
* } else if (!syntaxError) { syntaxError = true;
* error(STARTTAG_SYNTAX_ERR, "requires an attribute after " +
* ret);
*/
}
}
if (docHandler != null && !eHandleLogical) {
// if(dtd.toString().indexOf("XHTML") < 0 || !endWithSlash){
docHandler.startElement(ret.getNodeName(), attrlist);
// }
}
if (ed == null) {
if (!handleError(IParserError.UNKNOWN_ELEMENT, ret)) {
error(IParserError.UNKNOWN_ELEMENT, "Unknown Element: " + ret.getTagName()
+ ". Ignore it.");
}
return null;
} else if (ed == anonymousElementDef) {
if (handleError(IParserError.UNKNOWN_ELEMENT, ret)) {
return null;
} else {
error(IParserError.UNKNOWN_ELEMENT, "Unknown Element: " + ret.getTagName()
+ ". Define its definition as <!ELEMENT "
+ ret.getNodeName().toUpperCase() + " - - ANY>");
}
}
currentTagName = ret.getNodeName();
// if (isXHTML && isEndWithSlash) {
// System.out.println("slash end tag");
// if (docHandler != null && !eHandleLogical) {
// docHandler.endElement(ret.getNodeName());
// }
// System.out.println(docHandler);
// }
return ret;
}
public String makeUnique(String id) {
for (Enumeration e = pubEntityMap.elements(); e.hasMoreElements();) {
String ret = (String) e.nextElement();
if (id.equals(ret))
return ret;
}
return id;
}
/**
* Changes default dtd. If &lt;!DOCTYPE ... &gt; statement is missing in a
* top of a document, a parser reads it by specifed dtd.
*
* @param dtd
* dtd's public id for default like
* <code>"-//W3C//DTD HTML 4.0 Transitional//EN"</code>
*/
public void setDefaultDTD(String dtd) {
defaultDTD = makeUnique(dtd);
}
/**
* This instance variable holds a default DTD's public ID. If a parser meets
* a strange DOCTYPE declaration, it reads a document with this default DTD.
*/
protected String defaultDTD = null;
/**
* Gets top element's name. If you want the parser to read document without
* DOCTYPE declaration at the top, you must override this method to return
* some element's name. For HTML documents, it returns "HTML".
*
* @return top element's name
* @exception ParseException
* always thrown.
*/
protected String getDefaultTopElement() throws ParseException {
if (defaultTopElement != null) {
return defaultTopElement;
}
throw new ParseException(
"doesn't know which element must be at the top.");
}
private String defaultTopElement = null;
public int defaultTagCase = IParser.UPPER_CASE;
/**
* Specifies element names' case whose start tags are omitted. Default
* behavier makes them uppercased.
*
* @param tagCase
* this must be {@link IParser#UPPER_CASE}or {@link IParser#LOWER_CASE}. If
* otherwise, ignore.
*/
public void setDefaultTagCase(int tagCase) {
if (tagCase == 0 || tagCase == IParser.LOWER_CASE) {
this.defaultTagCase = tagCase;
}
}
/**
* Change a specified string to specified cased.
*
* @see #setDefaultTagCase(int)
*/
public String changeDefaultTagCase(String tag) {
switch (this.defaultTagCase) {
case IParser.UPPER_CASE:
return tag.toUpperCase();
case IParser.LOWER_CASE:
return tag.toLowerCase();
default:
throw new RuntimeException("Internal Parser Error");
}
}
public int tagCase = IParser.ORIGINAL_CASE;
/**
* Specifies element names' case. Default behavier makes them original
* cased.
*
* @param tagCase
* this must be {@link IParser#UPPER_CASE},{@link IParser#LOWER_CASE}or
* {@link IParser#ORIGINAL_CASE}. If otherwise, ignore.
*/
public void setTagCase(int tagCase) {
if (tagCase == IParser.UPPER_CASE || tagCase == IParser.LOWER_CASE
|| tagCase == IParser.ORIGINAL_CASE) {
this.tagCase = tagCase;
}
}
final String changeTagCase(String tag) {
switch (this.tagCase) {
case IParser.UPPER_CASE:
return tag.toUpperCase();
case IParser.LOWER_CASE:
return tag.toLowerCase();
case IParser.ORIGINAL_CASE:
return tag;
default:
throw new RuntimeException("Internal Parser Error");
}
}
public int attrCase = IParser.ORIGINAL_CASE;
/**
* Specifies attribute names' case. Default behavier makes them original
* cased.
*
* @param attrCase
* this must be {@link IParser#UPPER_CASE},{@link IParser#LOWER_CASE}or
* {@link IParser#ORIGINAL_CASE}. If otherwise, ignore.
*/
public void setAttrNameCase(int attrCase) {
if (attrCase == IParser.UPPER_CASE || attrCase == IParser.LOWER_CASE
|| attrCase == IParser.ORIGINAL_CASE) {
this.attrCase = attrCase;
}
}
final String changeAttrNameCase(String attr) {
switch (this.attrCase) {
case IParser.UPPER_CASE:
return attr.toUpperCase();
case IParser.LOWER_CASE:
return attr.toLowerCase();
case IParser.ORIGINAL_CASE:
return attr;
default:
throw new RuntimeException("Internal Parser Error");
}
}
private String enforcedDoctype = null;
/**
* Ignores a declaration at the top of the document and enforces document
* type specified by <code>publicId</code>
*/
public void enforceDoctype(String publicId) {
enforcedDoctype = makeUnique(publicId);
}
/**
* Closes input stream.
*/
public void close() throws IOException {
tokenizer.close();
tokenizer = null;
}
public Object extraErrInfo = null;
/**
* Gets extra error information. A parser passes two error information (e.g.
* error code and error node) to node-level error handlers. However, it is
* now enough for some kind of error handlers to recover the error. If
* {@link IParserError#SUDDEN_ENDTAG}error occurs, parser set missed end
* tags between error node and future context to extra error information.
*/
public Object getExtraErrInfo() {
return extraErrInfo;
}
private boolean handleError(int code, Node node) throws ParseException,
IOException, SAXException {
for (int i = errorHandlerNum - 1; i >= 0; i--) {
if (errorHandlers[i].handleError(code, this, node)) {
return true;
}
}
return false;
}
boolean handleError(int code, String errorStr) throws ParseException,
IOException {
for (int i = tokenErrorHandlerNum - 1; i >= 0; i--) {
if (tokenErrorHandlers[i].handleError(code, this, errorStr)) {
return true;
}
}
return false;
}
void putCharNumEntity(Character C, String ent) {
Document doc = getDocument();
if (doc instanceof SGMLDocument) {
((SGMLDocument) doc).putCharNumEntity(C, ent);
}
}
public DocumentHandler docHandler = null;
/**
* Gets a <i>DocumentHandler </i> instance included in this parser.
*
* @see #setDocumentHandler(org.xml.sax.DocumentHandler)
*/
public DocumentHandler getDocumentHandler() {
return this.docHandler;
}
/**
* Sets a <i>DocumentHandler </i> instace for this parser.
*
* @see #getDocumentHandler()
*/
public void setDocumentHandler(DocumentHandler handler) {
this.docHandler = handler;
}
public boolean preserveWhitespace = false;
/**
* Checks if parser preserve whitespaces or not.
*
* @return <code>true</code> if this parser preserve whitespaces.
* Otherwise <code>false</code>
*/
public boolean getPreserveWhitespace() {
return preserveWhitespace;
}
/**
* Determines if this parser preserve whitespaces or not. If preserve, the
* parser create text node for whitespace between tags and does not ignore
* carriage return and line feed after start tags and before end tags. By
* default a parser ignores whitespaces.
*
* @see #getPreserveWhitespace()
*/
public void setPreserveWhitespace(boolean preserv) {
if (tokenizer != null) {
tokenizer.setPreserveWhitespace(preserv);
}
this.preserveWhitespace = preserv;
}
/**
* Determines which this parser invokes {@link
* org.xml.sax.DocumentHandler#startElement(java.lang.String,
* org.xml.sax.AttributeList)} and
* {@link org.xml.sax.DocumentHandler#endElement(java.lang.String)}
* <code>logically</code> or <code>physically</code>.<code>
* Logical</code>
* means that if a start or end tag of a element is omitted, a parser
* invokes each method. <code>Physical</code> means that parsers invokes
* each method if and only if their tag appearently exist. If
* <code>physical</code>, a parser does not care if the tag is illegal or
* not. Default is <code>physical</code>
*
* @param logical
* if true, deal with tags as logical. Otherwise, as physical
*/
public void elementHandle(boolean logical) {
this.eHandleLogical = logical;
}
public boolean eHandleLogical = true;
public Vector<Element> autoGenerated = new Vector<Element>();
/**
* Checks if the specified element is automatically generated by this parser
* or not. For example, <code>TBODY</code> under <code>TABLE</code> is
* automatically generated in following document.
*
* <PRE>
*
* &lt;TABLE&gt; &lt;TR&gt;&lt;TD&gt;&lt;TD&gt; &lt;/TABLE&gt;
*
* <PRE>
*
* @param an
* element node to be checked
* @return <code>true</code> if <code>element</code> is automatically
* generated by this. Otherwise false.
*/
public boolean autoGenerated(Element element) {
return autoGenerated.contains(element);
}
public final void addAutoGenerated(Element element) {
autoGenerated.addElement(element);
}
/**
* Inserts a string to current position in read stream. This method is
* usually invoked by token-level error handlers.
*
* @param str
* String to insert
*/
public void insert(String str) throws IOException {
tokenizer.unread(str);
}
private AttributeListImpl createAttributeList() {
return new AttributeListImpl();
}
private boolean keepUnknowns;
/* (non-Javadoc)
* @see org.eclipse.actf.model.dom.sgml.impl.ISGMLParser#keepUnknownElements(boolean)
*/
public void keepUnknownElements(boolean keep) {
this.keepUnknowns = keep;
}
/**
* Gets current line number of source document.
*
* @return line number
* @deprecated Use {@link org.xml.sax.Locator#getLineNumber()}
*/
public int getCurrentLineNumber() {
return tokenizer.getCurrentLine();
}
/**
* Gets current column number of source document.
*
* @return column number
* @deprecated Use {@link org.xml.sax.Locator#getColumnNumber()}
*/
public int getCurrentColumnNumber() {
return tokenizer.getCurrentCol();
}
/**
* Gets current reading node
*/
public Node getCurrentNode() {
return this.currentNode;
}
public Node currentNode;
public void setCurrentNode(Node node) {
this.currentNode = node;
}
private boolean keepComment = true;
/**
* Determines if this parser keeps comments and processing instructions in
* the tree or not. By default, it keeps.
*
* @param <code>true</code> if it keeps, Otherwise <code>false</code>
*/
public void setKeepComment(boolean keep) {
this.keepComment = keep;
}
private char saxch[];
private int begin, len;
void setCharacter(char ch[], int begin, int len) {
this.saxch = ch;
this.begin = begin;
this.len = len;
}
private LexicalHandler lexHandler;
/**
* Note: does not support
* {@link org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)}and
* {@link org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)}.
*/
public void setLexicalHandler(LexicalHandler lexHandler) {
this.lexHandler = lexHandler;
}
LexicalHandler getLexicalHandler() {
return this.lexHandler;
}
void incrementDepth(int i) {
this.depth += i;
}
/**
* @param i
* depth
*/
public void reopenContext(int i) throws SAXException {
this.depth += i;
this.context = ancesters[depth - 1];
if (!eHandleLogical || docHandler == null)
return;
for (int j = depth; j < i; j++) {
docHandler.startElement(ancesters[j].getNodeName(),
nullAttributeList);
}
}
public Element[] getContextElements() {
Element ret[] = new Element[depth];
System.arraycopy(ancesters, 0, ret, 0, depth);
return ret;
}
/**
* Parses a fragment under specified context.
*
*/
public void parseFragment(Element parent, Reader reader)
throws IOException, ParseException, SAXException {
if (dtd == null) {
throw new ParseException("Can't parse without DTD");
} else if (doc == null) {
throw new ParseException("Can't parse without a Document");
}
this.ancesterElementDefs[0] = dtd.getElementDefinition(parent
.getNodeName());
if (this.ancesterElementDefs[0] == null) {
this.ancesterElementDefs[0] = anonymousElementDef;
}
this.ancesters[0] = parent;
this.context = parent;
depth = 1;
this.tokenizer = new InsTokenizer(reader, this);
if (docHandler != null) {
docHandler.setDocumentLocator(this.tokenizer);
}
tokenizer.extractNumEntity(extractNum);
tokenizer.extractCharEntity(extractChar);
tokenizer.setPreserveWhitespace(preserveWhitespace);
readInstances2();
}
/**
* Parses a fragment. As a side effect, wastes a element node.
*
*/
public DocumentFragment parseFragment(Reader reader) throws IOException,
ParseException, SAXException {
Element dummy = doc != null ? doc.createElement("dummy") : null;
parseFragment(dummy, reader);
DocumentFragment ret = doc.createDocumentFragment();
for (Node child = dummy.getFirstChild(); child != null; child = dummy
.getFirstChild()) {
dummy.removeChild(child);
ret.appendChild(child);
}
return ret;
}
public int getPushbackBufferSize() {
return BUF_SIZ;
}
}