Bug 489096 - HTML5 support
diff --git a/plugins/org.eclipse.actf.core/src/org/eclipse/actf/util/dom/DomPrintUtil.java b/plugins/org.eclipse.actf.core/src/org/eclipse/actf/util/dom/DomPrintUtil.java
index cdcaccb..9cd80c1 100644
--- a/plugins/org.eclipse.actf.core/src/org/eclipse/actf/util/dom/DomPrintUtil.java
+++ b/plugins/org.eclipse.actf.core/src/org/eclipse/actf/util/dom/DomPrintUtil.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 2008 IBM Corporation and Others
+ * Copyright (c) 2008, 2016 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -56,6 +56,7 @@
private boolean indent = true;
private boolean escapeTagBracket = false;
+ private boolean isHTML5 = false;
private AttributeFilter attrFilter = null;
@@ -88,8 +89,7 @@
}
private String getXMLString(String targetS) {
- return targetS.replaceAll(AMP, ESC_AMP).replaceAll(LT, ESC_LT)
- .replaceAll(GT, ESC_GT);
+ return targetS.replaceAll(AMP, ESC_AMP).replaceAll(LT, ESC_LT).replaceAll(GT, ESC_GT);
}
private String getAttributeString(Element element, Node attr) {
@@ -123,8 +123,7 @@
public String toXMLString() {
StringBuffer tmpSB = new StringBuffer(8192);
- TreeWalkerImpl treeWalker = new TreeWalkerImpl(document, whatToShow,
- nodeFilter, entityReferenceExpansion);
+ TreeWalkerImpl treeWalker = new TreeWalkerImpl(document, whatToShow, nodeFilter, entityReferenceExpansion);
String lt = escapeTagBracket ? ESC_LT : LT;
String gt = escapeTagBracket ? ESC_GT : GT;
@@ -133,9 +132,19 @@
Node tmpN = treeWalker.nextNode();
boolean prevIsText = false;
+ boolean isFirst = true;
+
String indentS = EMPTY_STR;
while (tmpN != null) {
short type = tmpN.getNodeType();
+
+ if (isFirst) {
+ if (type != Node.PROCESSING_INSTRUCTION_NODE && type != Node.DOCUMENT_TYPE_NODE && isHTML5) {
+ tmpSB.append("<!DOCTYPE html SYSTEM \"about:legacy-compat\">" + line_sep);
+ }
+ isFirst = false;
+ }
+
switch (type) {
case Node.ELEMENT_NODE:
if (prevIsText) {
@@ -176,58 +185,54 @@
} else {
comment = tmpN.getNodeValue();
}
- tmpSB.append(line_sep + indentS + lt + "!--" + comment + "--"
- + gt + line_sep);
+ tmpSB.append(line_sep + indentS + lt + "!--" + comment + "--" + gt + line_sep);
prevIsText = false;
break;
case Node.CDATA_SECTION_NODE:
- tmpSB.append(line_sep + indentS + lt + "!CDATA["
- + tmpN.getNodeValue() + "]]" + line_sep);
+ tmpSB.append(line_sep + indentS + lt + "!CDATA[" + tmpN.getNodeValue() + "]]" + line_sep);
break;
case Node.DOCUMENT_TYPE_NODE:
- if (tmpN instanceof DocumentType) {
+ if (isHTML5) {
+ tmpSB.append("<!DOCTYPE html SYSTEM \"about:legacy-compat\">" + line_sep);
+ } else if (tmpN instanceof DocumentType) {
DocumentType docType = (DocumentType) tmpN;
+
String pubId = docType.getPublicId();
String sysId = docType.getSystemId();
if (null != pubId && pubId.length() > 0) {
if (null != sysId && sysId.length() > 0) {
- tmpSB.append(lt + "!DOCTYPE " + docType.getName()
- + " PUBLIC \"" + pubId + " \"" + sysId
+ tmpSB.append(lt + "!DOCTYPE " + docType.getName() + " PUBLIC \"" + pubId + " \"" + sysId
+ "\">" + line_sep);
} else {
- tmpSB.append(lt + "!DOCTYPE " + docType.getName()
- + " PUBLIC \"" + pubId + "\">" + line_sep);
+ tmpSB.append(
+ lt + "!DOCTYPE " + docType.getName() + " PUBLIC \"" + pubId + "\">" + line_sep);
}
} else {
- tmpSB.append(lt + "!DOCTYPE " + docType.getName()
- + " SYSTEM \"" + docType.getSystemId() + "\">"
+ tmpSB.append(lt + "!DOCTYPE " + docType.getName() + " SYSTEM \"" + docType.getSystemId() + "\">"
+ line_sep);
}
} else {
- System.out
- .println("Document Type node does not implement DocumentType: "
- + tmpN);
+ System.out.println("Document Type node does not implement DocumentType: " + tmpN);
}
break;
case Node.PROCESSING_INSTRUCTION_NODE:
- if(tmpN instanceof ProcessingInstruction){
+ if (tmpN instanceof ProcessingInstruction) {
String tmpS = ((ProcessingInstruction) tmpN).getData();
- if(tmpS.startsWith("?")){
+ if (tmpS.startsWith("?")) {
tmpSB.append(lt);
- }else{
- tmpSB.append(lt+"?");
+ } else {
+ tmpSB.append(lt + "?");
}
- if(tmpS.endsWith("?")){
- tmpSB.append(tmpS+">"+LINE_SEP);
- }else{
- tmpSB.append(tmpS+"?>"+LINE_SEP);
+ if (tmpS.endsWith("?")) {
+ tmpSB.append(tmpS + ">" + LINE_SEP);
+ } else {
+ tmpSB.append(tmpS + "?>" + LINE_SEP);
}
}
break;
default:
- System.out.println(tmpN.getNodeType() + " : "
- + tmpN.getNodeName());
+ System.out.println(tmpN.getNodeType() + " : " + tmpN.getNodeName());
}
Node next = treeWalker.firstChild();
@@ -258,12 +263,10 @@
if (indentS.length() > 0) {
indentS = indentS.substring(1);
} else {
- System.err.println("indent: " + next.getNodeName()
- + " " + next);
+ System.err.println("indent: " + next.getNodeName() + " " + next);
}
}
- tmpSB.append(line_sep + indentS + lt + "/"
- + next.getNodeName() + gt + line_sep);
+ tmpSB.append(line_sep + indentS + lt + "/" + next.getNodeName() + gt + line_sep);
prevIsText = false;
}
next = treeWalker.nextSibling();
@@ -339,7 +342,8 @@
* want to print out DOM into <pre> section of HTML.
*
* @param escapeTagBracket
- * if true, print Tag bracket as escaped format ({@literal '<',
+ * if true, print Tag bracket as escaped format (
+ * {@literal '<',
* '>'})
*
*/
@@ -359,6 +363,15 @@
}
/**
+ * Set true if the document is HTML5.
+ *
+ * @param isHTML5
+ */
+ public void setHTML5(boolean isHTML5) {
+ this.isHTML5 = isHTML5;
+ }
+
+ /**
* Print out the target Document.
*
* @param filePath
@@ -403,8 +416,7 @@
* @throws IOException
*/
public void writeToFile(File file, String encode) throws IOException {
- PrintWriter tmpPW = new PrintWriter(new OutputStreamWriter(
- new FileOutputStream(file), encode));
+ PrintWriter tmpPW = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), encode));
tmpPW.println(toXMLString());
tmpPW.flush();
tmpPW.close();
diff --git a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/dom/html/DocumentTypeUtil.java b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/dom/html/DocumentTypeUtil.java
index 4d8d1d0..10669cb 100644
--- a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/dom/html/DocumentTypeUtil.java
+++ b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/dom/html/DocumentTypeUtil.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 2010 IBM Corporation and Others
+ * Copyright (c) 2010, 2016 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -14,23 +14,62 @@
import org.w3c.dom.DocumentType;
public class DocumentTypeUtil {
-
+
/**
* @param docType
- * @return original ID if HTML Parser overrides it. If not, it returns usual public ID from {@link DocumentType}.
+ * @return original ID if HTML Parser overrides it. If not, it returns usual
+ * public/system ID from {@link DocumentType}. If docType is null,
+ * then returns empty string.
*/
- public static String getOriginalID(DocumentType docType){
- if(docType==null){
+ public static String getOriginalID(DocumentType docType) {
+ if (docType == null) {
return "";
}
String id = docType.getPublicId();
- if(docType instanceof SGMLDocType){
- String tmpS = ((SGMLDocType)docType).getOrgId();
- if(tmpS != null){
+ if (docType instanceof SGMLDocType) {
+ String tmpS = ((SGMLDocType) docType).getOrgId();
+ if (tmpS != null) {
return tmpS;
}
}
return id;
}
+ /**
+ * @param docType
+ * @return true if original doctype is a kind of XHTML
+ */
+ public static boolean isOriginalXHTML(DocumentType docType) {
+ if (docType == null) {
+ return false;
+ }
+
+ String orgID = getOriginalID(docType);
+ DocumentTypeUtil.getOriginalID(docType);
+ if (orgID.indexOf("XHTML") > 0) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * @param docType
+ * @return true if original doctype is a kind of html5 (
+ * {@literal <!DOCTYPE html> or <!DOCTYPE html SYSTEM "about:legacy-compat">}
+ * )
+ */
+ public static boolean isOriginalHTML5(DocumentType docType) {
+ if (docType == null) {
+ return false;
+ }
+
+ String orgID = getOriginalID(docType);
+ DocumentTypeUtil.getOriginalID(docType);
+
+ if (orgID.isEmpty() || orgID.equalsIgnoreCase("about:legacy-compat")) {
+ return true;
+ }
+ return false;
+ }
+
}
diff --git a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/dom/html/IParserError.java b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/dom/html/IParserError.java
index 7c1037f..2265dbf 100644
--- a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/dom/html/IParserError.java
+++ b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/dom/html/IParserError.java
@@ -1,131 +1,131 @@
-/*******************************************************************************
- * Copyright (c) 2008, 2012 IBM Corporation and Others
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * Kentarou FUKUDA - initial API and implementation
- *******************************************************************************/
-package org.eclipse.actf.model.dom.html;
-
-/**
- * Interface for parser error constants
- */
-public interface IParserError {
-
- /**
- * Error code for missing DOCTYPE declaration. This kind of error is not
- * dealt with error handlers
- */
- public static final int DOCTYPE_MISSED = 1;
- /**
- * Error code for syntax error of DOCTYPE declaration. This kind of error is
- * not dealt with error handlers
- */
- public static final int ILLEGAL_DOCTYPE = 2;
- /**
- * Error code for an illegal top element. For HTML example, if an HTML
- * document starts with <LI> as follows, this is an illegal top
- * element. Because of the doctype declaration the document must start with
- * <HTML>
- *
- * <pre>
- * <!DOCTYPE
- * <em>
- * HTML
- * </em>
- * PUBLIC "-//W3C//DTD HTML 4.0//EN">
- * <em>
- * <LI>
- * </em>
- * </pre>
- *
- * This kind of error is not dealt with error handlers
- */
- public static final int ILLEGAL_TOP_ELEMENT = 3;
- /**
- * Error code for an illegal attribute. If an element has an unknown
- * attribute, that is an illegal attribute.
- */
- public static final int ILLEGAL_ATTRIBUTE = 4;
- /**
- * Error code for a floating endtag. Endtags whose corresponding start tag
- * is missing are defined as <em>floating</em>. For HTML example,
- * following </P> is floating because the P element is closed before
- * HR.
- *
- * <pre>
- * <P>
- * ...
- * <!-- Here is an omitted end tag of P becase following HR is now allowed
- * as a child of P -->
- * <HR>
- * ...
- * </P>
- *
- */
- public static final int FLOATING_ENDTAG = 5;
- /**
- * Error code for sudden endtag. A strange endtag that appears in some
- * context is defined as <em>sudden</em>. For HTML example, following
- * <em>
- * </I></em> is sudden.
- *
- * <pre>
- * <I> ... <B> ...
- * <em>
- * </I>
- * </em>
- * ... </B>
- * </pre>
- */
- public static final int SUDDEN_ENDTAG = 6;
- /**
- * Error code for illegal child. A node that is not allowed as a child of
- * context's element is defined as an illegal child. For HTML example,
- * following <em>P</em> element is an illegal child of HEAD
- *
- * <pre>
- * <HTML>
- * <HEAD>
- * <em>
- * <P> Illegal </P>
- * </em>
- * <TITLE>
- * ...
- * </pre>
- */
- public static final int ILLEGAL_CHILD = 7;
- /**
- * Error code for unknown elements (not defined in pre-read DTD)
- */
- public static final int UNKNOWN_ELEMENT = 8;
- /**
- * Error code for syntax error of start tag text.
- */
- public static final int STARTTAG_SYNTAX_ERR = 9;
- /**
- * Error code for miscellenious
- */
- public static final int MISC_ERR = 10;
- /**
- * Error code for token-level error of attribute value.
- */
- public static final int ATTR_VALUE = 11;
- /**
- * Error code for token-level error before attribute's name.
- */
- public static final int BEFORE_ATTRNAME = 12;
- /**
- * Error code for token-level error of tag name.
- */
- public static final int TAG_NAME = 13;
-
- /*
- * Error code for Byte-Order Mark (BOM) found in UTF-8 HTML file
- */
- public static final int BOM = 14;
-
-}
+/*******************************************************************************
+ * Copyright (c) 2008, 2016 IBM Corporation and Others
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * Kentarou FUKUDA - initial API and implementation
+ *******************************************************************************/
+package org.eclipse.actf.model.dom.html;
+
+/**
+ * Interface for parser error constants
+ */
+public interface IParserError {
+
+ /**
+ * Error code for missing DOCTYPE declaration. This kind of error is not
+ * dealt with error handlers
+ */
+ public static final int DOCTYPE_MISSED = 1;
+ /**
+ * Error code for syntax error of DOCTYPE declaration. This kind of error is
+ * not dealt with error handlers
+ */
+ public static final int ILLEGAL_DOCTYPE = 2;
+ /**
+ * Error code for an illegal top element. For HTML example, if an HTML
+ * document starts with <LI> as follows, this is an illegal top
+ * element. Because of the doctype declaration the document must start with
+ * <HTML>
+ *
+ * <pre>
+ * <!DOCTYPE
+ * <em>
+ * HTML
+ * </em>
+ * PUBLIC "-//W3C//DTD HTML 4.0//EN">
+ * <em>
+ * <LI>
+ * </em>
+ * </pre>
+ *
+ * This kind of error is not dealt with error handlers
+ */
+ public static final int ILLEGAL_TOP_ELEMENT = 3;
+ /**
+ * Error code for an illegal attribute. If an element has an unknown
+ * attribute, that is an illegal attribute.
+ */
+ public static final int ILLEGAL_ATTRIBUTE = 4;
+ /**
+ * Error code for a floating endtag. Endtags whose corresponding start tag
+ * is missing are defined as <em>floating</em>. For HTML example,
+ * following </P> is floating because the P element is closed before
+ * HR.
+ *
+ * <pre>
+ * <P>
+ * ...
+ * <!-- Here is an omitted end tag of P becase following HR is now allowed
+ * as a child of P -->
+ * <HR>
+ * ...
+ * </P>
+ *
+ */
+ public static final int FLOATING_ENDTAG = 5;
+ /**
+ * Error code for sudden endtag. A strange endtag that appears in some
+ * context is defined as <em>sudden</em>. For HTML example, following
+ * <em>
+ * </I></em> is sudden.
+ *
+ * <pre>
+ * <I> ... <B> ...
+ * <em>
+ * </I>
+ * </em>
+ * ... </B>
+ * </pre>
+ */
+ public static final int SUDDEN_ENDTAG = 6;
+ /**
+ * Error code for illegal child. A node that is not allowed as a child of
+ * context's element is defined as an illegal child. For HTML example,
+ * following <em>P</em> element is an illegal child of HEAD
+ *
+ * <pre>
+ * <HTML>
+ * <HEAD>
+ * <em>
+ * <P> Illegal </P>
+ * </em>
+ * <TITLE>
+ * ...
+ * </pre>
+ */
+ public static final int ILLEGAL_CHILD = 7;
+ /**
+ * Error code for unknown elements (not defined in pre-read DTD)
+ */
+ public static final int UNKNOWN_ELEMENT = 8;
+ /**
+ * Error code for syntax error of start tag text.
+ */
+ public static final int STARTTAG_SYNTAX_ERR = 9;
+ /**
+ * Error code for miscellenious
+ */
+ public static final int MISC_ERR = 10;
+ /**
+ * Error code for token-level error of attribute value.
+ */
+ public static final int ATTR_VALUE = 11;
+ /**
+ * Error code for token-level error before attribute's name.
+ */
+ public static final int BEFORE_ATTRNAME = 12;
+ /**
+ * Error code for token-level error of tag name.
+ */
+ public static final int TAG_NAME = 13;
+
+ /*
+ * Error code for Byte-Order Mark (BOM) found in UTF-8 HTML4 file
+ */
+ public static final int BOM = 14;
+
+}
diff --git a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/html/impl/SHTableCellElement.java b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/html/impl/SHTableCellElement.java
index 70fa1e8..69164c9 100644
--- a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/html/impl/SHTableCellElement.java
+++ b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/html/impl/SHTableCellElement.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 1998, 2008 IBM Corporation and Others
+ * Copyright (c) 1998, 2016 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -17,8 +17,7 @@
import org.w3c.dom.html.HTMLTableRowElement;
@SuppressWarnings("nls")
-public class SHTableCellElement extends SHElement implements
- HTMLTableCellElement {
+public class SHTableCellElement extends SHElement implements HTMLTableCellElement {
/**
*
*/
@@ -32,15 +31,13 @@
Node parent = getParentNode();
if (parent instanceof HTMLTableRowElement) {
int ret = 0;
- for (Node prev = getPreviousSibling(); prev != null; prev = prev
- .getPreviousSibling())
+ for (Node prev = getPreviousSibling(); prev != null; prev = prev.getPreviousSibling())
ret++;
return ret;
} else { // error.
int ret = 0;
for (Node prev = getPreviousSibling(); prev != null
- && !(prev instanceof HTMLTableRowElement); prev = prev
- .getPreviousSibling())
+ && !(prev instanceof HTMLTableRowElement); prev = prev.getPreviousSibling())
ret++;
return ret;
}
@@ -60,12 +57,11 @@
}
} else { // error.
Node leftMost = this;
- int currentIndex = 0;
+ // int currentIndex = 0;
for (Node prev = getPreviousSibling(); prev != null
- && !(prev instanceof HTMLTableRowElement); prev = prev
- .getPreviousSibling()) {
+ && !(prev instanceof HTMLTableRowElement); prev = prev.getPreviousSibling()) {
leftMost = prev;
- currentIndex++;
+ // currentIndex++;
}
parent.removeChild(this);
Node before = leftMost;
diff --git a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/html/parser/HTMLParser.java b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/html/parser/HTMLParser.java
index fde1d9a..fc0f197 100644
--- a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/html/parser/HTMLParser.java
+++ b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/html/parser/HTMLParser.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 1998, 2012 IBM Corporation and Others
+ * Copyright (c) 1998, 2016 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -24,6 +24,7 @@
import java.util.Enumeration;
import java.util.Properties;
+import org.eclipse.actf.model.dom.html.DocumentTypeUtil;
import org.eclipse.actf.model.dom.html.IErrorLogListener;
import org.eclipse.actf.model.dom.html.IHTMLParser;
import org.eclipse.actf.model.dom.html.IParser;
@@ -82,7 +83,7 @@
try {
map.load(is);
// pubEntityMap.putAll(map);
- Enumeration keys = map.keys(); // CRS
+ Enumeration<Object> keys = map.keys(); // CRS
while (keys.hasMoreElements()) { // CRS
String aKey = (String) keys.nextElement(); // CRS
String replaceKey = aKey.replace('@', ' '); // CRS
@@ -104,12 +105,13 @@
/**
* Constructs HTMLParser instance whose defaultDTD is
- * <code>"-//W3C//DTD HTML 4.0 Transitional//EN"</code>. And also,
+ * <code>"-//W3C//DTD HTML 4.01 Transitional//EN"</code>. And also,
* {@link HTMLErrorHandler} and {@link FramesetErrorHandler} instances are
* added.
*/
+ @SuppressWarnings("deprecation")
public HTMLParser() {
- defaultDTD = "-//W3C//DTD HTML 4.0 Transitional//EN"; //$NON-NLS-1$
+ defaultDTD = "-//W3C//DTD HTML 4.01 Transitional//EN"; //$NON-NLS-1$
addErrorHandler(new FramesetErrorHandler());
addErrorHandler(new HTMLErrorHandler());
setDocumentHandler(new PREHandler(this));
@@ -129,26 +131,25 @@
}
}
-/**
+ /**
* Reads files and print their top elements. This method is just for test.
* usage: java org.eclipse.actf.model.dom.html.HTMLParser [options] files...
* <br>
* options:
* <DL>
- * <DT> -e encoding
- * <DD> specify character encoding to <code>encoding</code>
- * <DT> -c
- * <DD> If it meets
+ * <DT>-e encoding
+ * <DD>specify character encoding to <code>encoding</code>
+ * <DT>-c
+ * <DD>If it meets
* <code> <META http-equiv="Content-Type" content="text/html;
- * charset=xxx"></code>
- * tag, change encoding to <code>xxx</code>
- * <DT> -d
- * <DD> Dump results.
- * <DT> -o output file
- * <DT> -x [dtd]
- * <DD> Dump as xml format.
- * <DT> -w?
- * <DD> warning
+ * charset=xxx"></code> tag, change encoding to <code>xxx</code>
+ * <DT>-d
+ * <DD>Dump results.
+ * <DT>-o output file
+ * <DT>-x [dtd]
+ * <DD>Dump as xml format.
+ * <DT>-w?
+ * <DD>warning
*
* @param args
* command line argument.
@@ -261,6 +262,10 @@
System.out.println("Encoding: " + parser.getEncoding());
} else {
parser.parse(is, encoding);
+ System.err.println("Doctype:" + parser.getDocument().getDoctype());
+ System.err.println(
+ "Org Doctype:" + DocumentTypeUtil.getOriginalID(parser.getDocument().getDoctype()));
+ System.err.println();
}
} catch (ParseException e) {
e.printStackTrace();
@@ -273,21 +278,17 @@
OutputStream os = targetFileName == null ? (OutputStream) System.out
: new FileOutputStream(targetFileName);
if (encoding != null) {
- pw = new PrintWriter(new OutputStreamWriter(os,
- encoding));
+ pw = new PrintWriter(new OutputStreamWriter(os, encoding));
} else {
pw = new PrintWriter(os);
}
if (!xml) {
- ((SHDocument) parser.getDocument()).printAsSGML(pw,
- indent);
+ ((SHDocument) parser.getDocument()).printAsSGML(pw, indent);
} else {
- ((SHDocument) parser.getDocument()).printAsXHTML(pw,
- indent, encoding);
+ ((SHDocument) parser.getDocument()).printAsXHTML(pw, indent, encoding);
}
} else if (list != null) {
- NodeList nodeList = parser.getDocument()
- .getElementsByTagName(list);
+ NodeList nodeList = parser.getDocument().getElementsByTagName(list);
for (i = 0; i < nodeList.getLength(); i++) {
System.out.println(nodeList.item(i));
}
@@ -308,14 +309,14 @@
* If unrecoverable syntax or token error occurred, thrown
* @exception IOException
*/
- public Node parse(InputStream is) throws ParseException, IOException,
- SAXException {
+ public Node parse(InputStream is) throws ParseException, IOException, SAXException {
JapaneseEncodingDetector JED = new JapaneseEncodingDetector(is);
try {
encoding = JED.detect();
- if(JED.hasBOM()){
- error(IParserError.BOM,"Byte-Order Mark (BOM) found in UTF-8 HTML file.");
+ // TODO check if html5 or not
+ if (JED.hasBOM()) {
+ error(IParserError.BOM, "Byte-Order Mark (BOM) found in UTF-8 HTML file.");
}
} catch (IOException e) {
throw (e);
@@ -349,8 +350,7 @@
* If unrecoverable syntax or token error occurred, throwed
* @exception IOException
*/
- public Node parse(InputStream is, String charEncoding) throws SAXException,
- ParseException, IOException {
+ public Node parse(InputStream is, String charEncoding) throws SAXException, ParseException, IOException {
if (charEncoding == null) {
isReader = new InputStreamReader(is);
} else {
@@ -379,8 +379,7 @@
* If unrecoverable syntax or token error occurred, throwed
* @exception IOException
*/
- public Node parseSwitchEnc(InputStream is) throws ParseException,
- IOException, SAXException {
+ public Node parseSwitchEnc(InputStream is) throws ParseException, IOException, SAXException {
return parseSwitchEnc(is, null);
}
@@ -440,8 +439,7 @@
} catch (Error e) {
return super.createDocument(docType);
}
- Document ret = ((HTMLDOMImplementation) domImpl)
- .createHTMLDocument("dummy"); //$NON-NLS-1$
+ Document ret = ((HTMLDOMImplementation) domImpl).createHTMLDocument("dummy"); //$NON-NLS-1$
if (ret.getDocumentElement() != null) {
ret.removeChild(ret.getDocumentElement());
}
@@ -470,8 +468,8 @@
}
private static void usage() {
- System.out
- .println("usage java org.eclipse.actf.model.dom.html.parser.HTMLParser [-w[#]] [-d] [-e encoding] [-c] files..."); //$NON-NLS-1$
+ System.out.println(
+ "usage java org.eclipse.actf.model.dom.html.parser.HTMLParser [-w[#]] [-d] [-ku] [-e encoding] [-c] files..."); //$NON-NLS-1$
System.exit(1);
}
diff --git a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/ElementDefinition.java b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/ElementDefinition.java
index 18bcdea..50c4918 100644
--- a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/ElementDefinition.java
+++ b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/ElementDefinition.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 1998, 2008 IBM Corporation and Others
+ * Copyright (c) 1998, 2016 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -58,8 +58,7 @@
attributeDefHashes = newHashes;
}
attributeDefs[attrNum] = def;
- attributeDefHashes[attrNum++] = hashCode(def.getName()
- .toCharArray());
+ attributeDefHashes[attrNum++] = hashCode(def.getName().toCharArray());
}
}
@@ -73,20 +72,17 @@
}
/**
- * Matches <code>child</code> to <code>parent</code> by
- * <code>parser</code>.
+ * Matches <code>child</code> to <code>parent</code> by <code>parser</code>.
*
* @return <code>true</code> if succeeded. Otherwise, <code>false</code>
*/
public boolean match(ISGMLParser parser, Node parent, Node child) {
- if (child instanceof Element
- && child.getNodeName().equalsIgnoreCase(name)) {
+ if (child instanceof Element && child.getNodeName().equalsIgnoreCase(name)) {
parent.appendChild(child);
return true;
}
if (start) {
- Element tmp = parser.getDocument().createElement(
- parser.changeDefaultTagCase(name));
+ Element tmp = parser.getDocument().createElement(parser.changeDefaultTagCase(name));
// check excepiton
if (child instanceof Element) {
Element ec = (Element) child;
@@ -119,9 +115,9 @@
}
/**
- * Matches <code>child</code> to <code>parent</code> by
- * <code>parser</code> as a conent. Suppose this instance is a definition
- * derived from <code><!ELEMENT TBODY O O (TR)+>,
+ * Matches <code>child</code> to <code>parent</code> by <code>parser</code>
+ * as a conent. Suppose this instance is a definition derived from
+ * <code><!ELEMENT TBODY O O (TR)+>,
* <TR> doesn't match <TBODY> in this method but in the {@link
* #match(ISGMLParser,org.w3c.dom.Node,org.w3c.dom.Node)} method.
* @return <code>true</code> if succeeded. Otherwise, <code>false</code>
@@ -183,8 +179,7 @@
}
final boolean instance(Node node) {
- return node instanceof Element
- && node.getNodeName().equalsIgnoreCase(name);
+ return node instanceof Element && node.getNodeName().equalsIgnoreCase(name);
}
/**
@@ -234,8 +229,7 @@
public AttributeDefinition getAttributeDef(String attrName) {
int hash = hashCode(attrName.toCharArray());
for (int i = attrNum - 1; i >= 0; i--) {
- if (attributeDefHashes[i] == hash
- && attributeDefs[i].getName().equalsIgnoreCase(attrName)) {
+ if (attributeDefHashes[i] == hash && attributeDefs[i].getName().equalsIgnoreCase(attrName)) {
return attributeDefs[i];
}
}
@@ -257,7 +251,7 @@
return false;
}
- private int refercount = 0;
+ // private int refercount = 0;
/**
* Sets reference flag.
@@ -270,7 +264,7 @@
if (infinite) {
this.infinite = infinite;
} else {
- refercount++;
+ //refercount++;
}
if (inclusions != null) {
for (int i = 0; i < inclusions.length; i++) {
@@ -305,11 +299,11 @@
}
/**
- * Just checks a node with <code>number</code> as Magic Number can match
- * to a parent defined by this instance. Rather than {@link
- * #match(SGMLParser,org.w3c.dom.Node,org.w3c.dom.Node)} or {@link
- * #contentMatch(SGMLParser,org.w3c.dom.Node,org.w3c.dom.Node)}, this has no
- * side effects.
+ * Just checks a node with <code>number</code> as Magic Number can match to
+ * a parent defined by this instance. Rather than
+ * {@link #match(SGMLParser,org.w3c.dom.Node,org.w3c.dom.Node)} or
+ * {@link #contentMatch(SGMLParser,org.w3c.dom.Node,org.w3c.dom.Node)}, this
+ * has no side effects.
*/
public boolean match(int number) {
return hash[number];
@@ -365,8 +359,7 @@
contentModelHash = new boolean[totalSize];
}
for (int i = totalSize - 1; i >= 0; i--) {
- hash[i] = (inclusionVector[i] | contentModelHash[i])
- & !exclusionVector[i];
+ hash[i] = (inclusionVector[i] | contentModelHash[i]) & !exclusionVector[i];
}
}
hash[this.number] = true;
diff --git a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLDocument.java b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLDocument.java
index acd3ac1..af8906a 100644
--- a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLDocument.java
+++ b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLDocument.java
@@ -19,6 +19,7 @@
import java.util.Hashtable;
import java.util.List;
+import org.eclipse.actf.model.dom.html.DocumentTypeUtil;
import org.eclipse.actf.model.internal.dom.sgml.ISGMLDocument;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
@@ -73,13 +74,13 @@
void check(Node node) throws DOMException {
if (node.getOwnerDocument() != this && !(node instanceof SGMLDocType)) {
- throw new DOMException(DOMException.WRONG_DOCUMENT_ERR, node
- + " created from " + node.getOwnerDocument() + " this.") {
+ throw new DOMException(DOMException.WRONG_DOCUMENT_ERR,
+ node + " created from " + node.getOwnerDocument() + " this.") {
- /**
- *
- */
- private static final long serialVersionUID = -6949628537817157229L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = -6949628537817157229L;
};
}
switch (node.getNodeType()) {
@@ -87,13 +88,12 @@
if (documentElement == null) {
documentElement = (Element) node;
} else {
- throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR,
- " document cannot have roots.") {
+ throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, " document cannot have roots.") {
- /**
- *
- */
- private static final long serialVersionUID = -8867384684522317782L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = -8867384684522317782L;
};
}
break;
@@ -105,13 +105,12 @@
((SGMLNode) node).ownerDocument = this;
break;
default:
- throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, node
- + " is not allowed as a child of " + this) {
+ throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, node + " is not allowed as a child of " + this) {
- /**
- *
- */
- private static final long serialVersionUID = 7083250644035192730L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = 7083250644035192730L;
};
}
}
@@ -119,8 +118,7 @@
public Node cloneNode(boolean deep) {
SGMLDocument ret = (SGMLDocument) super.cloneNode(true);
setOwnerDocument(ret, ret);
- for (Node child = ret.firstChild; child != null; child = child
- .getNextSibling()) {
+ for (Node child = ret.firstChild; child != null; child = child.getNextSibling()) {
if (child instanceof Element) {
ret.documentElement = (Element) child;
} else if (child instanceof DocumentType) {
@@ -130,17 +128,17 @@
processNodeDeepForOptimization(ret.documentElement);
return ret;
}
-
+
private void processNodeDeepForOptimization(Element element) {
Node f = element.getFirstChild();
- while(f != null) {
+ while (f != null) {
if (f instanceof Element) {
processNodeDeepForOptimization((Element) f);
}
f = f.getNextSibling();
}
if (element instanceof SGMLElement) {
- ((SGMLElement)element).processNodeForOptimization(element);
+ ((SGMLElement) element).processNodeForOptimization(element);
}
}
@@ -174,18 +172,16 @@
}
public EntityReference createEntityReference(String a) throws DOMException {
- throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
- "cannot create Entity Ref.") {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "cannot create Entity Ref.") {
- /**
- *
- */
- private static final long serialVersionUID = -4581301359508117945L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = -4581301359508117945L;
};
}
- public ProcessingInstruction createProcessingInstruction(String target,
- String data) {
+ public ProcessingInstruction createProcessingInstruction(String target, String data) {
return new SGMLPI(target, data, this);
}
@@ -210,77 +206,76 @@
// replaced for performance reason @2009/06/25 by dsato@jp.ibm.com
return documentElement.getElementsByTagName(tagname);
}
-
- /* very slow
- public NodeList getElementsByTagName(String tagname) {
- final boolean all = tagname.equals("*");
- final String targetName = tagname;
- return new NodeList() {
- public int getLength() {
- int ret = 0;
- Node tmp1, tmp2;
- tmp1 = documentElement;
- outer: while (tmp1 != null) {
- if (tmp1 instanceof Element
- && (all || targetName.equalsIgnoreCase(tmp1
- .getNodeName()))) {
- ret++;
- }
- if ((tmp2 = tmp1.getFirstChild()) == null) {
- if (tmp1 == documentElement) {
- break outer;
- } else {
- tmp2 = tmp1.getNextSibling();
- }
- }
- while (tmp2 == null && tmp1 != null) {
- tmp1 = tmp2 = tmp1.getParentNode();
- if (tmp1 != documentElement) {
- tmp2 = tmp1.getNextSibling();
- } else {
- break outer;
- }
- }
- tmp1 = tmp2;
- }
- return ret;
- }
- public Node item(int index) {
- Node tmp1, tmp2;
- tmp1 = documentElement;
- outer: while (tmp1 != null) {
- if (tmp1 instanceof Element
- && (all || targetName.equalsIgnoreCase(tmp1
- .getNodeName()))) {
- if (index == 0) {
- return tmp1;
- } else {
- index--;
- }
- }
- if ((tmp2 = tmp1.getFirstChild()) == null) {
- if (tmp1 == documentElement) {
- break outer;
- } else {
- tmp2 = tmp1.getNextSibling();
- }
- }
- while (tmp2 == null && tmp1 != null) {
- tmp1 = tmp2 = tmp1.getParentNode();
- if (tmp1 != documentElement) {
- tmp2 = tmp1.getNextSibling();
- } else {
- break outer;
- }
- }
- tmp1 = tmp2;
- }
- return null;
- }
- };
- }
- */
+ // very slow
+ // public NodeList getElementsByTagName(String tagname) {
+ // final boolean all = tagname.equals("*");
+ // final String targetName = tagname;
+ // return new NodeList() {
+ // public int getLength() {
+ // int ret = 0;
+ // Node tmp1, tmp2;
+ // tmp1 = documentElement;
+ // outer: while (tmp1 != null) {
+ // if (tmp1 instanceof Element
+ // && (all || targetName.equalsIgnoreCase(tmp1
+ // .getNodeName()))) {
+ // ret++;
+ // }
+ // if ((tmp2 = tmp1.getFirstChild()) == null) {
+ // if (tmp1 == documentElement) {
+ // break outer;
+ // } else {
+ // tmp2 = tmp1.getNextSibling();
+ // }
+ // }
+ // while (tmp2 == null && tmp1 != null) {
+ // tmp1 = tmp2 = tmp1.getParentNode();
+ // if (tmp1 != documentElement) {
+ // tmp2 = tmp1.getNextSibling();
+ // } else {
+ // break outer;
+ // }
+ // }
+ // tmp1 = tmp2;
+ // }
+ // return ret;
+ // }
+ //
+ // public Node item(int index) {
+ // Node tmp1, tmp2;
+ // tmp1 = documentElement;
+ // outer: while (tmp1 != null) {
+ // if (tmp1 instanceof Element
+ // && (all || targetName.equalsIgnoreCase(tmp1
+ // .getNodeName()))) {
+ // if (index == 0) {
+ // return tmp1;
+ // } else {
+ // index--;
+ // }
+ // }
+ // if ((tmp2 = tmp1.getFirstChild()) == null) {
+ // if (tmp1 == documentElement) {
+ // break outer;
+ // } else {
+ // tmp2 = tmp1.getNextSibling();
+ // }
+ // }
+ // while (tmp2 == null && tmp1 != null) {
+ // tmp1 = tmp2 = tmp1.getParentNode();
+ // if (tmp1 != documentElement) {
+ // tmp2 = tmp1.getNextSibling();
+ // } else {
+ // break outer;
+ // }
+ // }
+ // tmp1 = tmp2;
+ // }
+ // return null;
+ // }
+ // };
+ // }
public DOMImplementation getImplementation() {
return this.domImpl;
@@ -337,18 +332,16 @@
}
public void setNodeValue(String nodeValue) throws DOMException {
- throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "#document is always null") {
+ throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "#document is always null") {
- /**
- *
- */
- private static final long serialVersionUID = 6689389325290139309L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = 6689389325290139309L;
};
}
- public void printAsXML(String publicID, URL location, PrintWriter pw,
- boolean indent) throws IOException {
+ public void printAsXML(String publicID, URL location, PrintWriter pw, boolean indent) throws IOException {
printAsXML(publicID, location, pw, indent, null);
}
@@ -365,16 +358,15 @@
* indent if true. Otherwise, not indent.
* @param enc
*/
- public void printAsXML(String publicID, URL location, PrintWriter pw,
- boolean indent, String enc) throws IOException {
+ public void printAsXML(String publicID, URL location, PrintWriter pw, boolean indent, String enc)
+ throws IOException {
if (enc == null) {
pw.println("<?xml version=\"1.0\"?>");
} else {
pw.println("<?xml version=\"1.0\" encoding=\"" + enc + "\"?>");
}
if (publicID != null) {
- pw.print("<!DOCTYPE " + documentElement.getTagName() + " PUBLIC \""
- + publicID + '"');
+ pw.print("<!DOCTYPE " + documentElement.getTagName() + " PUBLIC \"" + publicID + '"');
if (location != null) {
pw.println(" \"" + location + "\">");
} else {
@@ -387,15 +379,14 @@
// charEntities4Xml = null;
}
- public void printAsXML(PrintWriter pw, boolean indent, String enc)
- throws IOException {
+ public void printAsXML(PrintWriter pw, boolean indent, String enc) throws IOException {
printAsXML(null, null, pw, indent, enc);
}
/**
* Print Document as SGML. starttag's string in this document are from the
- * original document (see {@link SGMLElement#toString()}. Even if
- * attributes in elements are modified, starttag's string does not change.
+ * original document (see {@link SGMLElement#toString()}. Even if attributes
+ * in elements are modified, starttag's string does not change.
*
* @param DTD's
* location
@@ -404,7 +395,14 @@
*/
public void printAsSGML(PrintWriter pw, boolean indent) throws IOException {
if (doctype != null) {
- pw.println(doctype.toString());
+ String orgDoctype = DocumentTypeUtil.getOriginalID(doctype);
+ if (orgDoctype.isEmpty()) {
+ pw.println("<!DOCTYPE html>");
+ } else if (orgDoctype.equalsIgnoreCase("about:legacy-compat")) {
+ pw.println("<!DOCTYPE html SYSTEM \"about:legacy-compat\">");
+ } else {
+ pw.println(doctype.toString());
+ }
}
if (documentElement != null)
((SGMLElement) documentElement).printAsSGML(pw, 0, indent);
@@ -415,11 +413,11 @@
/**
* @serial
*/
-// private Hashtable charEntities4Xml;
-//
-// String getEntityOrigin4Xml(String entity) {
-// return (String) charEntities4Xml.get(entity);
-// }
+ // private Hashtable charEntities4Xml;
+ //
+ // String getEntityOrigin4Xml(String entity) {
+ // return (String) charEntities4Xml.get(entity);
+ // }
private transient SGMLDocTypeDef dtd;
@@ -483,61 +481,55 @@
ret = createProcessingInstruction(pi.getTarget(), pi.getData());
break;
case Node.ENTITY_REFERENCE_NODE:
- throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
- "ENTITY_REFERENCE: " + importedNode) {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "ENTITY_REFERENCE: " + importedNode) {
- /**
- *
- */
- private static final long serialVersionUID = 1578579363722608207L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1578579363722608207L;
};
case Node.ENTITY_NODE:
- throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "ENTITY: "
- + importedNode) {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "ENTITY: " + importedNode) {
- /**
- *
- */
- private static final long serialVersionUID = -450181572985174561L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = -450181572985174561L;
};
case Node.DOCUMENT_NODE:
- throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOCUMENT: "
- + importedNode) {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOCUMENT: " + importedNode) {
- /**
- *
- */
- private static final long serialVersionUID = 8785157203267073381L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = 8785157203267073381L;
};
case Node.DOCUMENT_TYPE_NODE:
- throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
- "DOCUMENT_TYPE: " + importedNode) {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOCUMENT_TYPE: " + importedNode) {
- /**
- *
- */
- private static final long serialVersionUID = 5629745929750604049L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = 5629745929750604049L;
};
case Node.DOCUMENT_FRAGMENT_NODE:
ret = createDocumentFragment();
break;
case Node.NOTATION_NODE:
- throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "NOTATION: "
- + importedNode) {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "NOTATION: " + importedNode) {
- /**
- *
- */
- private static final long serialVersionUID = 1787799543281735366L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1787799543281735366L;
};
default:
- throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
- "Unknown node type: " + importedNode.getNodeType()) {
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "Unknown node type: " + importedNode.getNodeType()) {
- /**
- *
- */
- private static final long serialVersionUID = -9119985548894040858L;
+ /**
+ *
+ */
+ private static final long serialVersionUID = -9119985548894040858L;
};
}
if (deep) {
@@ -554,8 +546,7 @@
* always ignored.
* @return same as <code>createElement(qualifiedName)</code>
*/
- public Element createElementNS(String namespaceURI, String qualifiedName)
- throws DOMException {
+ public Element createElementNS(String namespaceURI, String qualifiedName) throws DOMException {
return createElement(qualifiedName);
}
@@ -564,8 +555,7 @@
* always ignored.
* @return same as <code>createAttribute(qualifiedName)</code>
*/
- public Attr createAttributeNS(String namespaceURI, String qualifiedName)
- throws DOMException {
+ public Attr createAttributeNS(String namespaceURI, String qualifiedName) throws DOMException {
return createAttribute(qualifiedName);
}
@@ -580,9 +570,8 @@
/**
* Returns the <code>Element</code> whose <code>ID</code> is given by
- * <code>elementId</code>. If more than one element has this
- * <code>ID</code> The first element in the depth-first and pre-order
- * traversal is returned.
+ * <code>elementId</code>. If more than one element has this <code>ID</code>
+ * The first element in the depth-first and pre-order traversal is returned.
*/
public Element getElementById(String elementID) {
// replaced for performance reason @2009/06/25 by dsato@jp.ibm.com
@@ -599,23 +588,13 @@
// return getElementById(documentElement, elementID);
}
/*
- private Element getElementById(Element el, String elementID) {
- if (el.getAttribute("id").equals(elementID)) {
- return el;
- } else {
- for (Node child = el.getFirstChild(); child != null; child = child
- .getNextSibling()) {
- if (child instanceof Element) {
- Element ret = getElementById((Element) child, elementID);
- if (ret != null) {
- return ret;
- }
- }
- }
- return null;
- }
- }
- */
+ * private Element getElementById(Element el, String elementID) { if
+ * (el.getAttribute("id").equals(elementID)) { return el; } else { for (Node
+ * child = el.getFirstChild(); child != null; child = child
+ * .getNextSibling()) { if (child instanceof Element) { Element ret =
+ * getElementById((Element) child, elementID); if (ret != null) { return
+ * ret; } } } return null; } }
+ */
/**
* DOM Level 3
@@ -666,8 +645,7 @@
}
- public Node renameNode(Node n, String namespaceURI, String qualifiedName)
- throws DOMException {
+ public Node renameNode(Node n, String namespaceURI, String qualifiedName) throws DOMException {
// TODO Auto-generated method stub
return null;
}
diff --git a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLPI.java b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLPI.java
index 700ea5b..b50e828 100644
--- a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLPI.java
+++ b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLPI.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 1998, 2008 IBM Corporation and Others
+ * Copyright (c) 1998, 2016 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -87,7 +87,10 @@
/**
* @serial
*/
- private String target, data;
+ @SuppressWarnings("unused")
+ private String target;
+
+ private String data;
/**
* Constructs ProcessingInstruction instance whose target value is null.
diff --git a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLParser.java b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLParser.java
index 15ecfea..9fddda2 100644
--- a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLParser.java
+++ b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/impl/SGMLParser.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 1998, 2008 IBM Corporation and Others
+ * Copyright (c) 1998, 2016 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -7,6 +7,7 @@
*
* Contributors:
* Goh KONDOH - initial API and implementation
+ * Kentarou FUKUDA - html5 support
*******************************************************************************/
package org.eclipse.actf.model.internal.dom.sgml.impl;
@@ -60,17 +61,16 @@
private DOMImplementation domImpl;
- private static Class createDocumentMethodParamTypes[] = { String.class,
- String.class, DocumentType.class };
+ private static Class<?> createDocumentMethodParamTypes[] = { String.class, String.class, DocumentType.class };
/**
* @return <code>null</code> if failed.
*/
public DOMImplementation setDOMImplementation(DOMImplementation domImpl) {
- Class domImpleInterface = DOMImplementation.class;
+ Class<DOMImplementation> domImpleInterface = DOMImplementation.class;
try {
- java.lang.reflect.Method createDocumentMethod = domImpleInterface
- .getMethod("createDocument", createDocumentMethodParamTypes);
+ java.lang.reflect.Method createDocumentMethod = domImpleInterface.getMethod("createDocument",
+ createDocumentMethodParamTypes);
if (createDocumentMethod != null) {
this.domImpl = domImpl;
doc = null;
@@ -124,8 +124,7 @@
* Adds an ErrorHandler instance. An errorHandler added later is invoked
* earlier by this parser instance than errorHandlers added earlier. If one
* errorHandler handles error (eg. returns <code>
- * true</code>), no more
- * errorHandlers are invoked.
+ * true</code>), no more errorHandlers are invoked.
*
* @param errorHandler
* errorHandler instance to be added to this parser
@@ -145,8 +144,7 @@
* Adds an ErrorHandler instance. An errorHandler added later is invoked
* earlier by this parser instance than errorHandlers added earlier. If one
* errorHandler handles error (eg. returns <code>
- * true</code>), no more
- * errorHandlers are invoked.
+ * true</code>), no more errorHandlers are invoked.
*
* @param errorHandler
* errorHandler instance to be added to this parser
@@ -488,8 +486,7 @@
}
} else {
String attName = changeAttrNameCase(tokenizer.sval);
- AttributeDefinition ad = ed != null ? ed.getAttributeDef(attName)
- : null;
+ AttributeDefinition ad = ed != null ? ed.getAttributeDef(attName) : null;
String attValue = attName;
if (tokenizer.nextToken() == EQ) {
attValue = tokenizer.readAttributeValue(ad, ed);
@@ -508,13 +505,11 @@
ret = attribute(ed, attrlist);
} else {
error(IParserError.ILLEGAL_ATTRIBUTE,
- "Illegal attribute '" + attName + "' for "
- + ed.getName());
+ "Illegal attribute '" + attName + "' for " + ed.getName());
}
}
} else if (attrlist != null) {
- attrlist.addAttribute(attName, ad.getDeclaredTypeStr(),
- attValue);
+ attrlist.addAttribute(attName, ad.getDeclaredTypeStr(), attValue);
}
}
return ret;
@@ -533,8 +528,7 @@
public final void error(int code, String msg) {
for (int i = 0; i < errorLogListenerNum; i++) {
if (tokenizer != null) {
- errorLogListeners[i].errorLog(code, tokenizer.getCurrentLine()
- + ": " + msg);
+ errorLogListeners[i].errorLog(code, tokenizer.getCurrentLine() + ": " + msg);
} else {
errorLogListeners[i].errorLog(code, msg);
}
@@ -738,8 +732,7 @@
}
break;
case PI:
- currentNode = ret = doc.createProcessingInstruction(null,
- tokenizer.sval);
+ currentNode = ret = doc.createProcessingInstruction(null, tokenizer.sval);
if (docHandler != null) {
docHandler.processingInstruction(null, tokenizer.sval);
}
@@ -748,8 +741,7 @@
ret = null;
break;
case MDO:
- error(IParserError.ILLEGAL_DOCTYPE,
- "Illegal Declaration. Discarding to next '>'");
+ error(IParserError.ILLEGAL_DOCTYPE, "Illegal Declaration. Discarding to next '>'");
if (tokenizer.nextToken() != '>') {
// consume '>'
tokenizer.consumeUntil('>');
@@ -757,8 +749,7 @@
}
return node();
default:
- error(IParserError.MISC_ERR,
- "Internal Parser Error: character encoding may be wrong.");
+ error(IParserError.MISC_ERR, "Internal Parser Error: character encoding may be wrong.");
return node();
}
return ret;
@@ -796,11 +787,9 @@
lastDef = dtd.getElementDefinition(getDefaultTopElement());
pcdataNumber = dtd.getElementCount();
for (int i = depth - 1; i >= 0; i--) {
- ancesterElementDefs[i] = dtd
- .getElementDefinition(ancesterElementDefs[i].getName());
+ ancesterElementDefs[i] = dtd.getElementDefinition(ancesterElementDefs[i].getName());
if (ancesterElementDefs[i] == null) {
- error(IParserError.UNKNOWN_ELEMENT, ancesters[i].getNodeName()
- + " is not defined in " + publicID);
+ error(IParserError.UNKNOWN_ELEMENT, ancesters[i].getNodeName() + " is not defined in " + publicID);
ancesterElementDefs[i] = anonymousElementDef;
}
}
@@ -824,8 +813,7 @@
* If unrecoverable syntax or token error occured, throwed
* @exception IOException
*/
- public Node parse(Reader reader) throws ParseException, IOException,
- SAXException {
+ public Node parse(Reader reader) throws ParseException, IOException, SAXException {
if (domImpl == null && doc == null) {
throw new ParseException("No factory instance.");
}
@@ -840,8 +828,7 @@
DocumentType docType = readDocType();
if (docType == null) {
error(IParserError.DOCTYPE_MISSED,
- "<!DOCTYPE ...> is missing. Try to use \"" + defaultDTD
- + "\" as document type");
+ "<!DOCTYPE ...> is missing. Try to use \"" + defaultDTD + "\" as document type");
setupDTD(defaultDTD);
}
tokenizer.extractNumEntity(extractNum);
@@ -849,8 +836,7 @@
tokenizer.setPreserveWhitespace(preserveWhitespace);
if (doc == null) {
doc = createDocument(docType);
- if (doc instanceof SGMLDocument
- && ((SGMLDocument) doc).getDTD() == null) {
+ if (doc instanceof SGMLDocument && ((SGMLDocument) doc).getDTD() == null) {
((SGMLDocument) doc).setDTD(this.dtd);
}
while (!commentsBeforeDoctype.isEmpty()) {
@@ -946,8 +932,7 @@
bufSize = bufSize * 2;
}
if (eHandleLogical && docHandler != null) {
- for (Node down = context.getLastChild(); down instanceof Element; down = down
- .getLastChild()) {
+ for (Node down = context.getLastChild(); down instanceof Element; down = down.getLastChild()) {
if (down == element) {
ancesters[depth] = element;
ancesterElementDefs[depth] = lastElementDef;
@@ -958,19 +943,16 @@
return;
} else {
ancesters[depth] = (Element) down;
- ancesterElementDefs[depth] = ed = dtd
- .getElementDefinition(down.getNodeName());
+ ancesterElementDefs[depth] = ed = dtd.getElementDefinition(down.getNodeName());
if (ed == null) {
ancesterElementDefs[depth] = anonymousElementDef;
}
- docHandler.startElement(down.getNodeName(),
- nullAttributeList);
+ docHandler.startElement(down.getNodeName(), nullAttributeList);
depth++;
}
}
} else {
- for (Node down = context.getLastChild(); down instanceof Element; down = down
- .getLastChild()) {
+ for (Node down = context.getLastChild(); down instanceof Element; down = down.getLastChild()) {
if (down == element) {
ancesters[depth] = element;
ancesterElementDefs[depth] = lastElementDef;
@@ -980,8 +962,7 @@
return;
} else {
ancesters[depth] = (Element) down;
- ancesterElementDefs[depth] = ed = dtd
- .getElementDefinition(down.getNodeName());
+ ancesterElementDefs[depth] = ed = dtd.getElementDefinition(down.getNodeName());
if (ed == null) {
ancesterElementDefs[depth] = anonymousElementDef;
}
@@ -1024,20 +1005,16 @@
return;
}
int forwardPathLen = 0;
- for (Node down = up.getLastChild(); down instanceof Element; down = down
- .getLastChild()) {
+ for (Node down = up.getLastChild(); down instanceof Element; down = down.getLastChild()) {
if (down == element) {
if (eHandleLogical && docHandler != null) {
for (int j = depth - 1; j > i; j--) {
docHandler.endElement(ancesters[j].getNodeName());
}
for (int j = 0; j < forwardPathLen; j++) {
- docHandler.startElement(forwardPath[j]
- .getNodeName(), nullAttributeList);
+ docHandler.startElement(forwardPath[j].getNodeName(), nullAttributeList);
ancesters[i + j + 1] = forwardPath[j];
- ancesterElementDefs[i + j + 1] = dtd
- .getElementDefinition(forwardPath[j]
- .getNodeName());
+ ancesterElementDefs[i + j + 1] = dtd.getElementDefinition(forwardPath[j].getNodeName());
if (ancesterElementDefs[i + j + 1] == null) {
ancesterElementDefs[i + j + 1] = anonymousElementDef;
}
@@ -1046,9 +1023,7 @@
} else {
for (int j = 0; j < forwardPathLen; j++) {
ancesters[i + j + 1] = forwardPath[j];
- ancesterElementDefs[i + j + 1] = dtd
- .getElementDefinition(forwardPath[j]
- .getNodeName());
+ ancesterElementDefs[i + j + 1] = dtd.getElementDefinition(forwardPath[j].getNodeName());
if (ancesterElementDefs[i + j + 1] == null) {
ancesterElementDefs[i + j + 1] = anonymousElementDef;
}
@@ -1084,15 +1059,13 @@
docHandler.endElement(ancesters[j].getNodeName());
}
for (int j = i; j < newDepth - 1; j++) {
- docHandler.startElement(newAncesters[j].getNodeName(),
- nullAttributeList);
+ docHandler.startElement(newAncesters[j].getNodeName(), nullAttributeList);
}
docHandler.startElement(element.getNodeName(), attrlist);
}
while (i < newDepth - 1) {
ancesters[i] = newAncesters[i];
- ancesterElementDefs[i] = dtd
- .getElementDefinition(newAncesters[i].getNodeName());
+ ancesterElementDefs[i] = dtd.getElementDefinition(newAncesters[i].getNodeName());
}
ancesters[newDepth - 1] = element;
ancesterElementDefs[newDepth - 1] = lastElementDef;
@@ -1134,8 +1107,7 @@
}
context = element;
if (docHandler != null && eHandleLogical) {
- AttributeListImpl al = attrlist != null ? attrlist
- : nullAttributeList;
+ AttributeListImpl al = attrlist != null ? attrlist : nullAttributeList;
docHandler.startElement(element.getNodeName(), al);
attrlist = null;
}
@@ -1146,13 +1118,11 @@
private Vector<EndTag> missedEndtags = new Vector<EndTag>();
- private Node readInstances() throws ParseException, IOException,
- SAXException {
+ private Node readInstances() throws ParseException, IOException, SAXException {
Node node = getNode();
if (node == null)
return doc;
- while (node.getNodeType() == Node.COMMENT_NODE
- || node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
+ while (node.getNodeType() == Node.COMMENT_NODE || node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
if (keepComment)
doc.appendChild(node);
node = getNode();
@@ -1165,8 +1135,7 @@
if (eHandleLogical && docHandler != null) {
docHandler.ignorableWhitespace(saxch, begin, len);
}
- error(IParserError.FLOATING_ENDTAG, "Illegal end tag: " + node
- + ". Ignore it.");
+ error(IParserError.FLOATING_ENDTAG, "Illegal end tag: " + node + ". Ignore it.");
}
return readInstances();
case Node.ELEMENT_NODE:
@@ -1175,20 +1144,17 @@
} else {
AttributeListImpl attrlisttmp = attrlist;
attrlist = null;
- setTopElement(doc.createElement(changeDefaultTagCase(lastDef
- .getName())));
+ setTopElement(doc.createElement(changeDefaultTagCase(lastDef.getName())));
attrlist = attrlisttmp;
addAutoGenerated(context);
if (lastDef.getContentModel().match(this, context, node)) {
if (!lastDef.startTagOmittable()) {
- error(IParserError.ILLEGAL_TOP_ELEMENT, node
- + " can't be a top element.");
+ error(IParserError.ILLEGAL_TOP_ELEMENT, node + " can't be a top element.");
}
setContextForward((Element) node);
} else if (!handleError(IParserError.ILLEGAL_CHILD, node)) {
addErrorNode(context);
- error(IParserError.ILLEGAL_CHILD, node
- + " is not allowed as a child of " + context);
+ error(IParserError.ILLEGAL_CHILD, node + " is not allowed as a child of " + context);
context.appendChild(node);
setContextForward((Element) node);
}
@@ -1199,22 +1165,19 @@
if (preserveWhitespace && whitespaceText((Text) node)) {
return readInstances();
}
- error(IParserError.ILLEGAL_TOP_ELEMENT,
- "#text can't be a top element");
- setTopElement(doc.createElement(changeDefaultTagCase(lastDef
- .getName())));
+ error(IParserError.ILLEGAL_TOP_ELEMENT, "#text can't be a top element");
+ setTopElement(doc.createElement(changeDefaultTagCase(lastDef.getName())));
addAutoGenerated(context);
context.appendChild(node);
break;
default:
- throw new ParseException(tokenizer.getCurrentLine()
- + ": Internal Parser Error " + node);
+ throw new ParseException(tokenizer.getCurrentLine() + ": Internal Parser Error " + node);
}
return readInstances2();
}
- private Node readInstances2() throws ParseException, IOException,
- SAXException {
+ @SuppressWarnings("unused")
+ private Node readInstances2() throws ParseException, IOException, SAXException {
ElementDefinition ed;
Node node;
outer: for (node = getNode(); node != null; node = getNode()) {
@@ -1231,16 +1194,12 @@
case ENDTAG:
missedEndtags.removeAllElements();
- if (_DEBUG
- && node.getNodeName().equalsIgnoreCase(
- System.getProperty("DEBUG_ENDTAG"))) {
+ if (_DEBUG && node.getNodeName().equalsIgnoreCase(System.getProperty("DEBUG_ENDTAG"))) {
System.err.println("DEBUG: " + node);
}
for (int i = depth - 1; i >= 0; i--) {
- if (ancesterElementDefs[i].number == lastElementNumber
- && (lastElementNumber != pcdataNumber + 1 || ancesters[i]
- .getNodeName().equalsIgnoreCase(
- node.getNodeName()))) {
+ if (ancesterElementDefs[i].number == lastElementNumber && (lastElementNumber != pcdataNumber + 1
+ || ancesters[i].getNodeName().equalsIgnoreCase(node.getNodeName()))) {
if (!missedEndtags.isEmpty()) {
@@ -1250,9 +1209,8 @@
break node_sel;
}
extraErrInfo = null;
- error(IParserError.SUDDEN_ENDTAG, missedEndtags
- + " have been forced to be inserted by "
- + node);
+ error(IParserError.SUDDEN_ENDTAG,
+ missedEndtags + " have been forced to be inserted by " + node);
}
/*
* if (ancesterElementDefs[depth - 1].number ==
@@ -1270,8 +1228,7 @@
break node_sel;
} else {
if (!ancesterElementDefs[i].endTagOmittable()) {
- missedEndtags.insertElementAt(new EndTag(
- ancesters[i].getNodeName()), 0);
+ missedEndtags.insertElementAt(new EndTag(ancesters[i].getNodeName()), 0);
}
}
@@ -1280,16 +1237,13 @@
if (eHandleLogical && docHandler != null) {
docHandler.ignorableWhitespace(saxch, begin, len);
}
- error(IParserError.FLOATING_ENDTAG, "Illegal end tag: "
- + node + ". Ignore it");
+ error(IParserError.FLOATING_ENDTAG, "Illegal end tag: " + node + ". Ignore it");
}
break;
case Node.ELEMENT_NODE:
Element element = (Element) node;
Element exParent = null;
- if (_DEBUG
- && element.getTagName().equalsIgnoreCase(
- System.getProperty("DEBUG_STARTTAG"))) {
+ if (_DEBUG && element.getTagName().equalsIgnoreCase(System.getProperty("DEBUG_STARTTAG"))) {
System.err.println("DEBUG: " + element);
}
@@ -1307,14 +1261,10 @@
if (exParent != null) {
if (!handleError(IParserError.ILLEGAL_CHILD, node)) {
addErrorNode(context);
- error(IParserError.ILLEGAL_CHILD, node
- + " is an exception uner "
- + ancesters[i]);
+ error(IParserError.ILLEGAL_CHILD, node + " is an exception uner " + ancesters[i]);
} else {
- if (context != node && eHandleLogical
- && docHandler != null) {
- docHandler.startElement(node.getNodeName(),
- attrlist);
+ if (context != node && eHandleLogical && docHandler != null) {
+ docHandler.startElement(node.getNodeName(), attrlist);
}
postElement(element);
break node_sel;
@@ -1338,8 +1288,7 @@
IModelGroup contentModel = ed.getContentModel();
// TODO correct this
- if (contentModel.match(lastElementNumber)
- && contentModel.match(this, context, node)) {
+ if (contentModel.match(lastElementNumber) && contentModel.match(this, context, node)) {
// System.out.println("model: fow");
@@ -1353,9 +1302,7 @@
ed = ancesterElementDefs[i];
contentModel = ed.getContentModel();
if (contentModel.match(lastElementNumber)
- && (found = contentModel.match(this,
- ancesters[i], node))
- || !ed.endTagOmittable()) {
+ && (found = contentModel.match(this, ancesters[i], node)) || !ed.endTagOmittable()) {
break;
}
}
@@ -1370,8 +1317,7 @@
context.appendChild(element);
addErrorNode(context);
- error(IParserError.ILLEGAL_CHILD, node
- + " is not allowed as a child of " + context);
+ error(IParserError.ILLEGAL_CHILD, node + " is not allowed as a child of " + context);
setContextForward(element);
} else if (element.getParentNode() != null) {
@@ -1391,8 +1337,7 @@
// TODO ???
context.appendChild(element);
addErrorNode(context);
- error(IParserError.ILLEGAL_CHILD, node
- + " is not allowed as a child of " + context);
+ error(IParserError.ILLEGAL_CHILD, node + " is not allowed as a child of " + context);
// System.out.println(node.getNodeName()+context);
setContextForward(element);
}
@@ -1411,15 +1356,13 @@
}
ed = ancesterElementDefs[depth - 1];
contentModel = ed.getContentModel();
- if (contentModel.match(pcdataNumber)
- && contentModel.match(this, context, node)) {
+ if (contentModel.match(pcdataNumber) && contentModel.match(this, context, node)) {
break;
} else if (ed.endTagOmittable()) {
for (int i = depth - 2; i >= 0; i--) {
ed = ancesterElementDefs[i];
contentModel = ed.getContentModel();
- if (contentModel.match(pcdataNumber)
- && contentModel.match(this, ancesters[i], node)) {
+ if (contentModel.match(pcdataNumber) && contentModel.match(this, ancesters[i], node)) {
break node_sel;
} else if (!ed.endTagOmittable()) {
break;
@@ -1429,13 +1372,11 @@
if (handleError(IParserError.ILLEGAL_CHILD, node))
break node_sel;
addErrorNode(context);
- error(IParserError.ILLEGAL_CHILD, "#text(" + node
- + ") is not allowed as a child of " + context);
+ error(IParserError.ILLEGAL_CHILD, "#text(" + node + ") is not allowed as a child of " + context);
context.appendChild(node);
break;
default:
- throw new ParseException(tokenizer.getCurrentLine()
- + ": Internal parser error " + node);
+ throw new ParseException(tokenizer.getCurrentLine() + ": Internal parser error " + node);
}
}
@@ -1452,8 +1393,7 @@
if ((node = getNode()) != null) {
this.context = doc.getDocumentElement();
if (docHandler != null && eHandleLogical) {
- docHandler.startElement(this.context.getNodeName(),
- nullAttributeList);
+ docHandler.startElement(this.context.getNodeName(), nullAttributeList);
}
// System.out.println("SGMLParser: pushback");
@@ -1476,8 +1416,7 @@
return true;
}
- private void postElement(Element element) throws ParseException,
- IOException, SAXException {
+ private void postElement(Element element) throws ParseException, IOException, SAXException {
IModelGroup mg = lastElementDef.getContentModel();
if (mg == cdata) {
@@ -1508,8 +1447,7 @@
}
}
- private CDATASection readCDATA(String arg) throws ParseException,
- IOException, SAXException {
+ private CDATASection readCDATA(String arg) throws ParseException, IOException, SAXException {
if (lexHandler != null) {
lexHandler.startCDATA();
}
@@ -1539,8 +1477,7 @@
String str;
}
- private DocumentType readDocType() throws ParseException, IOException,
- SAXException {
+ private DocumentType readDocType() throws ParseException, IOException, SAXException {
if (lastDef != null) {
throw new ParseException("Already read DOCTYPE declaration");
}
@@ -1567,8 +1504,7 @@
catb.str = tokenizer.sval;
commentsBeforeDoctype.addElement(catb);
} else {
- currentNode = doc.createProcessingInstruction(null,
- tokenizer.sval);
+ currentNode = doc.createProcessingInstruction(null, tokenizer.sval);
doc.appendChild(currentNode);
}
if (docHandler != null) {
@@ -1583,94 +1519,165 @@
}
}
- if (tokenizer.nextToken() != NAME_CHAR
- && !tokenizer.sval.equals("DOCTYPE")) {
- throw new ParseException("Unknown declaration at "
- + tokenizer.getCurrentLine());
+ if (tokenizer.nextToken() != NAME_CHAR && !tokenizer.sval.equals("DOCTYPE")) {
+ throw new ParseException("Unknown declaration at " + tokenizer.getCurrentLine());
}
+
+ String docTypeName = "";
/*
* Only supports initially setted public entity. For example, <!DOCTYPE
* HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+ *
+ * 2016: added support for html5 doctype
*/
if (tokenizer.nextToken() == NAME_CHAR) {
- String docTypeName = tokenizer.sval;
+ docTypeName = tokenizer.sval;
this.defaultTopElement = docTypeName;
- if (tokenizer.nextToken() == NAME_CHAR
- && tokenizer.sval.equalsIgnoreCase("PUBLIC")) {
- if (tokenizer.nextToken() == '"') {
- String publicID = tokenizer.eatUntil('"');
- String orgId = publicID;
- if (enforcedDoctype != null) {
- publicID = enforcedDoctype;
- }
- if (lexHandler != null) {
- lexHandler.startDTD(docTypeName, publicID, null);
- }
- String entityFileName = pubEntityMap.get(publicID);
- if (entityFileName == null) {
- if (defaultDTD != null) {
- error(IParserError.ILLEGAL_DOCTYPE, "Instead of \""
- + publicID + "\" use \"" + defaultDTD
- + "\" as a DTD.");
- entityFileName = pubEntityMap.get(defaultDTD);
+ if (tokenizer.nextToken() == NAME_CHAR) {
+ String type = tokenizer.sval;
+ if (type.equalsIgnoreCase("PUBLIC") || type.equalsIgnoreCase("SYSTEM")) {
+ // to cover html5
+ // (<!DOCTYPE html SYSTEM "about:legacy-compat">)
+
+ if (tokenizer.nextToken() == '"') {
+ String id = tokenizer.eatUntil('"');
+ String orgId = id;
+ if (enforcedDoctype != null) {
+ id = enforcedDoctype;
}
+ if (lexHandler != null) {
+ lexHandler.startDTD(docTypeName, id, null);
+ }
+
+ // TODO consider to use pseudo DTD for html5
+ String entityFileName = pubEntityMap.get(id);
if (entityFileName == null) {
- throw new ParseException(tokenizer.getCurrentLine()
- + ": this parser does not support "
- + publicID);
+ if (defaultDTD != null) {
+ if (type.equalsIgnoreCase("PUBLIC")) {
+ error(IParserError.ILLEGAL_DOCTYPE,
+ "Instead of \"" + id + "\" use \"" + defaultDTD + "\" as a DTD.");
+ } else {
+ // for SYSTEM
+
+ if (orgId.equalsIgnoreCase("about:legacy-compat")) {
+ error(IParserError.ILLEGAL_DOCTYPE, "Instead of SYSTEM \"" + id
+ + "\" use PUBLIC \"" + defaultDTD + "\" as a DTD.");
+ } else {
+ error(IParserError.ILLEGAL_DOCTYPE,
+ "Invalid DOCTYPE declaration. Use " + defaultDTD);
+ }
+ }
+
+ entityFileName = pubEntityMap.get(defaultDTD);
+ }
+ if (entityFileName == null) {
+ throw new ParseException(
+ tokenizer.getCurrentLine() + ": this parser does not support " + id);
+ }
+ id = defaultDTD;
}
- publicID = defaultDTD;
- }
- setupDTD(publicID);
- if (domImpl == null) {
- domImpl = doc.getImplementation();
- }
- DocumentType ret = null;
- if (domImpl != null) {
- currentNode = ret = createDocType(domImpl, docTypeName,
- publicID);
- if(!publicID.equals(orgId) && ret instanceof SGMLDocType){
- ((SGMLDocType)ret).setOrgId(orgId);
+ setupDTD(id);
+ if (domImpl == null) {
+ domImpl = doc.getImplementation();
}
+ DocumentType ret = null;
+ if (domImpl != null) {
+ currentNode = ret = createDocType(domImpl, docTypeName, id);
+ if (!id.equals(orgId) && ret instanceof SGMLDocType) {
+ ((SGMLDocType) ret).setOrgId(orgId);
+ }
+ }
+ // consume '>'
+ tokenizer.consumeUntil('>');
+ tokenizer.switchTo(DEFAULT);
+ lastDef = dtd.getElementDefinition(docTypeName);
+ if (lastDef == null) {
+ String topElementName = getDefaultTopElement();
+ error(IParserError.ILLEGAL_DOCTYPE,
+ docTypeName + " is not defined as a root element. Use " + topElementName + '.');
+ lastDef = dtd.getElementDefinition(topElementName);
+ }
+ if (lexHandler != null) {
+ lexHandler.endDTD();
+ }
+ return ret;
}
- // consume '>'
- tokenizer.consumeUntil('>');
- tokenizer.switchTo(DEFAULT);
- lastDef = dtd.getElementDefinition(docTypeName);
- if (lastDef == null) {
- String topElementName = getDefaultTopElement();
- error(IParserError.ILLEGAL_DOCTYPE, docTypeName
- + " is not defined as a root element. Use "
- + topElementName + '.');
- lastDef = dtd.getElementDefinition(topElementName);
- }
- if (lexHandler != null) {
- lexHandler.endDTD();
- }
- return ret;
}
+
}
}
+
+ // for html5
+ if (docTypeName.equalsIgnoreCase("html")) {
+
+ // TODO consider to use pseudo DTD for html5
+ String systemID = "about:legacy-compat";// temp for html5
+ if (enforcedDoctype != null) {
+ systemID = enforcedDoctype;
+ }
+ // if (lexHandler != null) {
+ // lexHandler.startDTD(docTypeName, systemID, null);
+ // }
+ String entityFileName = pubEntityMap.get(systemID);
+ if (entityFileName == null) {
+ if (defaultDTD != null) {
+ error(IParserError.ILLEGAL_DOCTYPE,
+ "For html5 document use PUBLIC \"" + defaultDTD + "\" as a DTD.");
+ entityFileName = pubEntityMap.get(defaultDTD);
+ }
+ if (entityFileName == null) {
+ throw new ParseException(tokenizer.getCurrentLine() + ": this parser does not support " + systemID);
+ }
+ systemID = defaultDTD;
+ }
+ setupDTD(systemID);
+ if (domImpl == null) {
+ domImpl = doc.getImplementation();
+ }
+ DocumentType ret = null;
+ if (domImpl != null) {
+ currentNode = ret = createDocType(domImpl, docTypeName, systemID);
+ ((SGMLDocType) ret).setOrgId(""); // use empty string
+ }
+
+ // if current sval is not '>' then consume '>'
+ if (!tokenizer.sval.equals(">")) {
+ tokenizer.consumeUntil('>');
+ error(IParserError.ILLEGAL_DOCTYPE, "Invalid DOCTYPE declaration. Use " + defaultDTD);
+ }
+ tokenizer.switchTo(DEFAULT);
+ lastDef = dtd.getElementDefinition(docTypeName);
+ if (lastDef == null) {
+ String topElementName = getDefaultTopElement();
+ error(IParserError.ILLEGAL_DOCTYPE,
+ docTypeName + " is not defined as a root element. Use " + topElementName + '.');
+ lastDef = dtd.getElementDefinition(topElementName);
+ }
+ // if (lexHandler != null) {
+ // lexHandler.endDTD();
+ // }
+ return ret;
+ }
+
tokenizer.consumeUntil('>');
- error(IParserError.ILLEGAL_DOCTYPE, "Invalid DOCTYPE declaration. Use "
- + defaultDTD);
+
+ // others
+ error(IParserError.ILLEGAL_DOCTYPE, "Invalid DOCTYPE declaration. Use " + defaultDTD);
setupDTD(defaultDTD);
lastDef = dtd.getElementDefinition(getDefaultTopElement());
return null;
}
- private DocumentType createDocType(DOMImplementation domImpl,
- String docTypeName, String publicID) {
+ private DocumentType createDocType(DOMImplementation domImpl, String docTypeName, String publicID) {
/*
* For compatibility to DOM level 1
*/
- Class domImplClass = domImpl.getClass();
- Class stringClass = docTypeName.getClass();
- Class parameterTypes[] = { stringClass, stringClass, stringClass };
+ Class<? extends DOMImplementation> domImplClass = domImpl.getClass();
+ Class<? extends String> stringClass = docTypeName.getClass();
+ Class<?> parameterTypes[] = { stringClass, stringClass, stringClass };
java.lang.reflect.Method method;
try {
- method = domImplClass.getMethod("createDocumentType",
- parameterTypes);
+ method = domImplClass.getMethod("createDocumentType", parameterTypes);
} catch (NoSuchMethodException e) {
return null;
}
@@ -1696,8 +1703,7 @@
private void expandNodesWithIllegalChildren() {
Node newNodes[] = new Node[nodeWithIllegalChildNum * 2];
- System.arraycopy(nodesWithIllegalChildren, 0, newNodes, 0,
- nodeWithIllegalChildNum);
+ System.arraycopy(nodesWithIllegalChildren, 0, newNodes, 0, nodeWithIllegalChildNum);
nodesWithIllegalChildren = newNodes;
}
@@ -1744,15 +1750,13 @@
if (getDOMImplementation() != null) {
setDOMImplementation(getDOMImplementation());
} else {
- for (Node child = doc.getFirstChild(); child != null; child = doc
- .getFirstChild()) {
+ for (Node child = doc.getFirstChild(); child != null; child = doc.getFirstChild()) {
doc.removeChild(child);
}
}
}
- int getCharEntity(String entity) throws IOException, ParseException,
- SAXException {
+ int getCharEntity(String entity) throws IOException, ParseException, SAXException {
SGMLEntityReference er = null;
try {
er = dtd.getEntityReference(entity);
@@ -1764,15 +1768,13 @@
if (ch != -1) {
return ch;
}
- InsTokenizer tokenizer2 = new InsTokenizer(ed.getReplacementReader(),
- this);
+ InsTokenizer tokenizer2 = new InsTokenizer(ed.getReplacementReader(), this);
if (tokenizer2.nextToken() == PCDATA && tokenizer2.sval.length() == 1) {
char ret = tokenizer2.sval.charAt(0);
ed.setReplacementChar(ret);
return ret;
} else {
- throw new ParseException("Internal Parser Error: " + entity
- + " not defined.");
+ throw new ParseException("Internal Parser Error: " + entity + " not defined.");
}
}
@@ -1806,8 +1808,7 @@
if (handleError(IParserError.TAG_NAME, tokenizer.sval)) {
return stag();
}
- error(IParserError.STARTTAG_SYNTAX_ERR,
- "Perhaps character encoding may not be correct.");
+ error(IParserError.STARTTAG_SYNTAX_ERR, "Perhaps character encoding may not be correct.");
while (tokenizer.nextToken() != NAME_CHAR) {
if (tokenizer.ttype == EOF || tokenizer.ttype == TAGC) {
return null;
@@ -1819,8 +1820,7 @@
if (ed != null) {
lastElementNumber = ed.number;
lastElementDef = ed;
- isEmptyElement = ed.getContentModel().toString().equalsIgnoreCase(
- "EMPTY");
+ isEmptyElement = ed.getContentModel().toString().equalsIgnoreCase("EMPTY");
} else if (keepUnknowns) {
lastElementNumber = pcdataNumber + 1;
ed = lastElementDef = anonymousElementDef;
@@ -1843,8 +1843,7 @@
if (handleError(IParserError.BEFORE_ATTRNAME, tokenizer.sval)) {
continue;
}
- error(IParserError.STARTTAG_SYNTAX_ERR,
- "requires an attribute in " + ret);
+ error(IParserError.STARTTAG_SYNTAX_ERR, "requires an attribute in " + ret);
tokenizer.pushBack();
break;
} else if (tokenizer.ttype == EOF) {
@@ -1869,11 +1868,12 @@
// }
}
+ // TODO html5 support
+
if (ed == null) {
if (!handleError(IParserError.UNKNOWN_ELEMENT, ret)) {
- error(IParserError.UNKNOWN_ELEMENT, "Unknown Element: "
- + ret.getTagName() + ". Ignore it.");
+ error(IParserError.UNKNOWN_ELEMENT, "Unknown Element: " + ret.getTagName() + ". Ignore it.");
}
return null;
} else if (ed == anonymousElementDef) {
@@ -1881,10 +1881,8 @@
if (handleError(IParserError.UNKNOWN_ELEMENT, ret)) {
return null;
} else {
- error(IParserError.UNKNOWN_ELEMENT, "Unknown Element: "
- + ret.getTagName()
- + ". Define its definition as <!ELEMENT "
- + ret.getNodeName().toUpperCase() + " - - ANY>");
+ error(IParserError.UNKNOWN_ELEMENT, "Unknown Element: " + ret.getTagName()
+ + ". Define its definition as <!ELEMENT " + ret.getNodeName().toUpperCase() + " - - ANY>");
}
}
@@ -1900,8 +1898,7 @@
}
public String makeUnique(String id) {
- for (Enumeration<String> e = pubEntityMap.elements(); e
- .hasMoreElements();) {
+ for (Enumeration<String> e = pubEntityMap.elements(); e.hasMoreElements();) {
String ret = e.nextElement();
if (id.equals(ret))
return ret;
@@ -1940,8 +1937,7 @@
if (defaultTopElement != null) {
return defaultTopElement;
}
- throw new ParseException(
- "doesn't know which element must be at the top.");
+ throw new ParseException("doesn't know which element must be at the top.");
}
private String defaultTopElement = null;
@@ -1990,8 +1986,7 @@
* otherwise, ignore.
*/
public void setTagCase(int tagCase) {
- if (tagCase == IParser.UPPER_CASE || tagCase == IParser.LOWER_CASE
- || tagCase == IParser.ORIGINAL_CASE) {
+ if (tagCase == IParser.UPPER_CASE || tagCase == IParser.LOWER_CASE || tagCase == IParser.ORIGINAL_CASE) {
this.tagCase = tagCase;
}
}
@@ -2021,8 +2016,7 @@
* otherwise, ignore.
*/
public void setAttrNameCase(int attrCase) {
- if (attrCase == IParser.UPPER_CASE || attrCase == IParser.LOWER_CASE
- || attrCase == IParser.ORIGINAL_CASE) {
+ if (attrCase == IParser.UPPER_CASE || attrCase == IParser.LOWER_CASE || attrCase == IParser.ORIGINAL_CASE) {
this.attrCase = attrCase;
}
}
@@ -2071,8 +2065,7 @@
return extraErrInfo;
}
- private boolean handleError(int code, Node node) throws ParseException,
- IOException, SAXException {
+ private boolean handleError(int code, Node node) throws ParseException, IOException, SAXException {
for (int i = errorHandlerNum - 1; i >= 0; i--) {
if (errorHandlers[i].handleError(code, this, node)) {
return true;
@@ -2081,8 +2074,7 @@
return false;
}
- boolean handleError(int code, String errorStr) throws ParseException,
- IOException {
+ boolean handleError(int code, String errorStr) throws ParseException, IOException {
for (int i = tokenErrorHandlerNum - 1; i >= 0; i--) {
if (tokenErrorHandlers[i].handleError(code, this, errorStr)) {
return true;
@@ -2150,12 +2142,11 @@
* {@link org.xml.sax.DocumentHandler#startElement(java.lang.String, org.xml.sax.AttributeList)}
* and {@link org.xml.sax.DocumentHandler#endElement(java.lang.String)}
* <code>logically</code> or <code>physically</code>.<code>
- * Logical</code> means
- * that if a start or end tag of a element is omitted, a parser invokes each
- * method. <code>Physical</code> means that parsers invokes each method if
- * and only if their tag appearently exist. If <code>physical</code>, a
- * parser does not care if the tag is illegal or not. Default is
- * <code>physical</code>
+ * Logical</code> means that if a start or end tag of a element is omitted,
+ * a parser invokes each method. <code>Physical</code> means that parsers
+ * invokes each method if and only if their tag appearently exist. If
+ * <code>physical</code>, a parser does not care if the tag is illegal or
+ * not. Default is <code>physical</code>
*
* @param logical
* if true, deal with tags as logical. Otherwise, as physical
@@ -2179,10 +2170,9 @@
*
* <PRE>
*
- * @param an
- * element node to be checked
- * @return <code>true</code> if <code>element</code> is automatically
- * generated by this. Otherwise false.
+ * @param an element node to be checked @return
+ * <code>true</code> if <code>element</code> is
+ * automatically generated by this. Otherwise false.
*/
public boolean autoGenerated(Element element) {
return autoGenerated.contains(element);
@@ -2207,6 +2197,7 @@
return new AttributeListImpl();
}
+ // TODO to track this boolean
private boolean keepUnknowns;
/*
@@ -2259,7 +2250,8 @@
* Determines if this parser keeps comments and processing instructions in
* the tree or not. By default, it keeps.
*
- * @param <code>true</code> if it keeps, Otherwise <code>false</code>
+ * @param <code>true</code>
+ * if it keeps, Otherwise <code>false</code>
*/
public void setKeepComment(boolean keep) {
this.keepComment = keep;
@@ -2304,8 +2296,7 @@
if (!eHandleLogical || docHandler == null)
return;
for (int j = depth; j < i; j++) {
- docHandler.startElement(ancesters[j].getNodeName(),
- nullAttributeList);
+ docHandler.startElement(ancesters[j].getNodeName(), nullAttributeList);
}
}
@@ -2319,15 +2310,13 @@
* Parses a fragment under specified context.
*
*/
- public void parseFragment(Element parent, Reader reader)
- throws IOException, ParseException, SAXException {
+ public void parseFragment(Element parent, Reader reader) throws IOException, ParseException, SAXException {
if (dtd == null) {
throw new ParseException("Can't parse without DTD");
} else if (doc == null) {
throw new ParseException("Can't parse without a Document");
}
- this.ancesterElementDefs[0] = dtd.getElementDefinition(parent
- .getNodeName());
+ this.ancesterElementDefs[0] = dtd.getElementDefinition(parent.getNodeName());
if (this.ancesterElementDefs[0] == null) {
this.ancesterElementDefs[0] = anonymousElementDef;
}
@@ -2349,13 +2338,11 @@
* Parses a fragment. As a side effect, wastes a element node.
*
*/
- public DocumentFragment parseFragment(Reader reader) throws IOException,
- ParseException, SAXException {
+ public DocumentFragment parseFragment(Reader reader) throws IOException, ParseException, SAXException {
Element dummy = doc != null ? doc.createElement("dummy") : null;
parseFragment(dummy, reader);
DocumentFragment ret = doc.createDocumentFragment();
- for (Node child = dummy.getFirstChild(); child != null; child = dummy
- .getFirstChild()) {
+ for (Node child = dummy.getFirstChild(); child != null; child = dummy.getFirstChild()) {
dummy.removeChild(child);
ret.appendChild(child);
}
diff --git a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/util/ErrorHandlerTableImpl.java b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/util/ErrorHandlerTableImpl.java
index afeb799..e85ea7f 100644
--- a/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/util/ErrorHandlerTableImpl.java
+++ b/plugins/org.eclipse.actf.model.dom.html/src/org/eclipse/actf/model/internal/dom/sgml/util/ErrorHandlerTableImpl.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 1998, 2008 IBM Corporation and Others
+ * Copyright (c) 1998, 2016 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -11,9 +11,11 @@
package org.eclipse.actf.model.internal.dom.sgml.util;
-import java.net.URL;
import java.net.MalformedURLException;
-import java.util.*;
+import java.net.URL;
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.Properties;
import org.eclipse.actf.model.dom.html.IErrorHandler;
@@ -39,7 +41,7 @@
} catch (Exception e) {
e.printStackTrace();
}
- for (Enumeration e = prop.keys(); e.hasMoreElements();) {
+ for (Enumeration<Object> e = prop.keys(); e.hasMoreElements();) {
String urlStr = (String) e.nextElement();
URL url;
try {
@@ -70,7 +72,7 @@
IErrorHandler ret[] = new IErrorHandler[errorHandlerNum];
for (int i = 0; i < errorHandlerNum; i++) {
try {
- Class errorHandlerClass = Class
+ Class<?> errorHandlerClass = Class
.forName(errorHandlerNameArray[i]);
ret[i] = (IErrorHandler) errorHandlerClass.newInstance();
} catch (Exception e) {