| /******************************************************************************* |
| * Copyright (c) 2008 IBM Corporation and Others |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * Goh KONDOH - initial API and implementation |
| * Kentarou FUKUDA - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.actf.model.dom.html; |
| |
| import java.io.IOException; |
| |
| import org.eclipse.actf.model.internal.dom.sgml.impl.EndTag; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.Node; |
| import org.xml.sax.SAXException; |
| |
| /** |
| * Base Interface for parser implementations. |
| */ |
| public interface IParser { |
| |
| public static final int UPPER_CASE = 0; |
| public static final int LOWER_CASE = 1; |
| public static final int ORIGINAL_CASE = 2; |
| |
| /** |
| * Adds an ErrorHandler instance. An errorHandler added later is invoked |
| * earlier by this parser instance than errorHandlers added earlier. If one |
| * errorHandler handles error (e.g. returns <code> |
| * true</code>), no more |
| * errorHandlers are invoked. |
| * |
| * @param errorHandler |
| * errorHandler instance to be added to this parser |
| */ |
| public void addErrorHandler(IErrorHandler errorHandler); |
| |
| /** |
| * Adds an error log listener. Listener is invoked when error is occurred. |
| * |
| * @param listener |
| * target listener |
| */ |
| public void addErrorLogListener(IErrorLogListener listener); |
| |
| /** |
| * Checks if the specified element is automatically generated by this parser |
| * or not. For example, <code>TBODY</code> under <code>TABLE</code> is |
| * automatically generated in following document. |
| * |
| * <PRE> |
| * <TABLE> <TR><TD><TD> </TABLE> |
| * </PRE> |
| * |
| * @param element |
| * element node to be checked |
| * @return true if {@link Element} is automatically generated by this. |
| * Otherwise false. |
| * |
| */ |
| public boolean autoGenerated(Element element); |
| |
| /** |
| * Change a specified string to specified cased. |
| * |
| * @param tag |
| * target tag case ({@value IParser#LOWER_CASE}, |
| * {@value IParser#ORIGINAL_CASE} or {@value IParser#UPPER_CASE}) |
| * @see #setDefaultTagCase(int) |
| */ |
| public String changeDefaultTagCase(String tag); |
| |
| /** |
| * Determines which this parser invokes {@link |
| * org.xml.sax.DocumentHandler#startElement(java.lang.String, |
| * org.xml.sax.AttributeList)} and |
| * {@link org.xml.sax.DocumentHandler#endElement(java.lang.String)} |
| * <code>logically</code> or <code>physically</code>.<code> |
| * Logical</code> |
| * means that if a start or end tag of a element is omitted, a parser |
| * invokes each method. <code>Physical</code> means that parsers invokes |
| * each method if and only if their tag apparently exist. If |
| * <code>physical</code>, a parser does not care if the tag is illegal or |
| * not. Default is <code>physical</code> |
| * |
| * @param logical |
| * if true, deal with tags as logical. Otherwise, as physical |
| */ |
| public void elementHandle(boolean logical); |
| |
| /** |
| * Records an error. Does nothing by default. If it has ErrorLogListener |
| * instances, calls their {@link IErrorLogListener#errorLog(int,String)} |
| * method. |
| * |
| * @param code |
| * error code. |
| * @param msg |
| * message of the error. |
| */ |
| public void error(int code, String msg); |
| |
| /** |
| * Gets a current context element. |
| * |
| * @return context element |
| * @see #setContext(org.w3c.dom.Element) |
| */ |
| public Element getContext(); |
| |
| /** |
| * @return array of context element |
| */ |
| public Element[] getContextElements(); |
| |
| /** |
| * Gets a document parsed by this instance. |
| * |
| * @return parsed document instance. |
| * @see #setDocument(org.w3c.dom.Document) |
| */ |
| public abstract Document getDocument(); |
| |
| /** |
| * Gets node-level error handlers included in this parser |
| * |
| * @return error handlers |
| */ |
| public IErrorHandler[] getErrorHandlers(); |
| |
| /** |
| * Gets extra error information. A parser passes two error information (e.g. |
| * error code and error node) to node-level error handlers. However, it is |
| * now enough for some kind of error handlers to recover the error. If |
| * {@link IParserError#SUDDEN_ENDTAG}error occurs, parser set missed end |
| * tags between error node and future context to extra error information. |
| */ |
| public Object getExtraErrInfo(); |
| |
| /** |
| * Gets a Node or {@link EndTag}from a currently reading stream as a result |
| * of tokenizing. |
| * |
| * @return {@link org.w3c.dom.Node Node} or {@link EndTag} |
| * @exception ParseException |
| * @exception IOException |
| * @see #pushBackNode(org.w3c.dom.Node) |
| */ |
| public Node getNode() throws ParseException, IOException, SAXException; |
| |
| public int getPushbackBufferSize(); |
| |
| /** |
| * Checks if a specified element has its end tag or not. |
| * |
| * @param element |
| * element to be checked. |
| * @return <code>true</code> if {@link Element} has its end tag. |
| * Otherwise, false. |
| */ |
| public boolean hasEndTag(Element element); |
| |
| /** |
| * Determines unknown elements are kept or not. The definition of the |
| * elements is <code><!ELEMENT <i>anonymous</i> - - ANY></code>, |
| * where <i>anonymous </i> is an unknown element. |
| * |
| * @param keep |
| * <code>true</code> if parser keeps. Otherwise |
| * <code>false</code> |
| */ |
| public abstract void keepUnknownElements(boolean keep); |
| |
| /** |
| * Pushes back a node to this parser. |
| * |
| * @param node |
| * node to be pushed back. |
| * @see #getNode() |
| */ |
| public void pushBackNode(Node node); |
| |
| /** |
| * @param i |
| * depth |
| */ |
| public void reopenContext(int i) throws SAXException; |
| |
| /** |
| * Specifies attribute names' case. Default behavior makes them original |
| * cased. |
| * |
| * @param attrCase |
| * this must be {@link IParser#UPPER_CASE},{@link IParser#LOWER_CASE}or |
| * {@link IParser#ORIGINAL_CASE}. If otherwise, ignore. |
| */ |
| public void setAttrNameCase(int attrCase); |
| |
| /** |
| * Sets current context element node. |
| * |
| * @param element |
| * new context. |
| * @see #getContext() |
| */ |
| public void setContext(Element element) throws SAXException; |
| |
| /** |
| * Sets current node |
| * |
| * @param node |
| * new current node. |
| */ |
| public void setCurrentNode(Node node); |
| |
| /** |
| * Specifies element names' case whose start tags are omitted. Default |
| * behavior makes them upper-cased. |
| * |
| * @param tagCase |
| * this must be {@link IParser#UPPER_CASE}or |
| * {@link IParser#LOWER_CASE}. If otherwise, ignore. |
| */ |
| public void setDefaultTagCase(int tagCase); |
| |
| /** |
| * Determines that a specified element has its end tag. |
| */ |
| public void setHasEndTag(Element element); |
| |
| /** |
| * Specifies element names' case. Default behavior makes them original |
| * cased. |
| * |
| * @param tagCase |
| * this must be {@link IParser#UPPER_CASE},{@link IParser#LOWER_CASE}or |
| * {@link IParser#ORIGINAL_CASE}. If otherwise, ignore. |
| */ |
| public void setTagCase(int tagCase); |
| |
| } |