474041: improve support for HTML in CommonMark 

Change-Id: I5faa17074b572ed4bb6ecbbed0b6df606497205d
Task-Url: https://bugs.eclipse.org/bugs/show_bug.cgi?id=474041
diff --git a/org.eclipse.mylyn.wikitext.commonmark.tests/src/org/eclipse/mylyn/internal/wikitext/commonmark/spec/CommonMarkSpecTest.java b/org.eclipse.mylyn.wikitext.commonmark.tests/src/org/eclipse/mylyn/internal/wikitext/commonmark/spec/CommonMarkSpecTest.java
index 5f3c667..57bee45 100644
--- a/org.eclipse.mylyn.wikitext.commonmark.tests/src/org/eclipse/mylyn/internal/wikitext/commonmark/spec/CommonMarkSpecTest.java
+++ b/org.eclipse.mylyn.wikitext.commonmark.tests/src/org/eclipse/mylyn/internal/wikitext/commonmark/spec/CommonMarkSpecTest.java
@@ -58,11 +58,6 @@
 
 	private static final Set<Integer> LINE_EXCLUSIONS = ImmutableSet.of(//
 			281, // Tabs
-			1915, // HTML blocks
-			2016, // HTML blocks
-			2054, // HTML blocks
-			2070, // HTML blocks
-			2078, // HTML blocks
 			2399, // Link reference definitions
 			2478, // Link reference definitions
 			2515, // Link reference definitions
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/CommonMark.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/CommonMark.java
index 52a341b..12f3280 100644
--- a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/CommonMark.java
+++ b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/CommonMark.java
@@ -18,6 +18,10 @@
 import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HorizontalRuleBlock;
 import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HtmlBlock;
 import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HtmlType1Block;
+import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HtmlCommentBlock;
+import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HtmlProcessingInstructionBlock;
+import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HtmlDoctypeBlock;
+import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HtmlCdataBlock;
 import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HtmlType7Block;
 import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.IndentedCodeBlock;
 import org.eclipse.mylyn.internal.wikitext.commonmark.blocks.ListBlock;
@@ -28,7 +32,8 @@
 
 	private static final SourceBlocks SOURCE_BLOCKS = new SourceBlocks(new BlockQuoteBlock(), new AtxHeaderBlock(),
 			new HorizontalRuleBlock(), new ListBlock(), new SetextHeaderBlock(), new FencedCodeBlock(),
-			new IndentedCodeBlock(), new HtmlType1Block(), new HtmlBlock(), new HtmlType7Block(), new ParagraphBlock(),
+			new IndentedCodeBlock(), new HtmlType1Block(), new HtmlCommentBlock(), new HtmlProcessingInstructionBlock(),
+			new HtmlDoctypeBlock(), new HtmlCdataBlock(), new HtmlBlock(), new HtmlType7Block(), new ParagraphBlock(),
 			new EmptyBlock());
 
 	public static SourceBlocks sourceBlocks() {
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/AbstractHtmlBlock.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/AbstractHtmlBlock.java
new file mode 100644
index 0000000..ce03eb2
--- /dev/null
+++ b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/AbstractHtmlBlock.java
@@ -0,0 +1,53 @@
+/*******************************************************************************
+ * Copyright (c) 2015 David Green.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     David Green - initial API and implementation
+ *******************************************************************************/
+
+package org.eclipse.mylyn.internal.wikitext.commonmark.blocks;
+
+import java.util.regex.Pattern;
+
+import org.eclipse.mylyn.internal.wikitext.commonmark.Line;
+import org.eclipse.mylyn.internal.wikitext.commonmark.LineSequence;
+import org.eclipse.mylyn.internal.wikitext.commonmark.ProcessingContext;
+import org.eclipse.mylyn.internal.wikitext.commonmark.SourceBlock;
+import org.eclipse.mylyn.wikitext.core.parser.DocumentBuilder;
+
+abstract class AbstractHtmlBlock extends SourceBlock {
+
+	@Override
+	public void process(ProcessingContext context, DocumentBuilder builder, LineSequence lineSequence) {
+		Line line = lineSequence.getCurrentLine();
+		while (line != null) {
+			builder.charactersUnescaped(line.getText());
+			builder.charactersUnescaped("\n");
+
+			lineSequence.advance();
+
+			if (closePattern().matcher(line.getText()).find()) {
+				break;
+			}
+
+			line = lineSequence.getCurrentLine();
+		}
+	}
+
+	@Override
+	public boolean canStart(LineSequence lineSequence) {
+		Line line = lineSequence.getCurrentLine();
+		if (line != null) {
+			return startPattern().matcher(line.getText()).matches();
+		}
+		return false;
+	}
+
+	protected abstract Pattern closePattern();
+
+	protected abstract Pattern startPattern();
+}
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlBlock.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlBlock.java
index 1b8647c..3081b57 100644
--- a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlBlock.java
+++ b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlBlock.java
@@ -23,8 +23,7 @@
 
 	private static final String BLOCK_TAG_NAMES = "address|article|aside|base|basefont|blockquote|body|button|canvas|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|embed|fieldset|figcaption|figure|footer|form|frame|frameset|h1|h2|h3|h4|h5|h6|head|header|hgroup|hr|html|iframe|legend|li|link|main|map|menu|menuitem|meta|nav|noframes|object|ol|optgroup|option|output|p|param|progress|section|source|summary|table|tbody|td|textarea|tfoot|th|thead|title|tr|track|ul|video";
 
-	private final Pattern startPattern = Pattern.compile(
-			"\\s{0,3}((</?(?:" + BLOCK_TAG_NAMES + ")(\\s|/>|>)?)|(<!--)|(-->)|(<\\?)|(\\?>)|(<!\\[CDATA\\[)|(]]>)).*",
+	private final Pattern startPattern = Pattern.compile("\\s{0,3}((</?(?:" + BLOCK_TAG_NAMES + ")(\\s|/>|>)?)).*",
 			Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
 
 	@Override
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlCdataBlock.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlCdataBlock.java
new file mode 100644
index 0000000..83375ca
--- /dev/null
+++ b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlCdataBlock.java
@@ -0,0 +1,31 @@
+/*******************************************************************************
+ * Copyright (c) 2015 David Green.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     David Green - initial API and implementation
+ *******************************************************************************/
+
+package org.eclipse.mylyn.internal.wikitext.commonmark.blocks;
+
+import java.util.regex.Pattern;
+
+public class HtmlCdataBlock extends AbstractHtmlBlock {
+
+	private final Pattern startPattern = Pattern.compile("\\s{0,3}<!\\[CDATA\\[.*");
+
+	private final Pattern closePattern = Pattern.compile("]]>");
+
+	@Override
+	protected Pattern closePattern() {
+		return closePattern;
+	}
+
+	@Override
+	protected Pattern startPattern() {
+		return startPattern;
+	}
+}
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlCommentBlock.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlCommentBlock.java
new file mode 100644
index 0000000..c5ebb35
--- /dev/null
+++ b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlCommentBlock.java
@@ -0,0 +1,31 @@
+/*******************************************************************************
+ * Copyright (c) 2015 David Green.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     David Green - initial API and implementation
+ *******************************************************************************/
+
+package org.eclipse.mylyn.internal.wikitext.commonmark.blocks;
+
+import java.util.regex.Pattern;
+
+public class HtmlCommentBlock extends AbstractHtmlBlock {
+
+	private final Pattern startPattern = Pattern.compile("\\s{0,3}<!--.*");
+
+	private final Pattern closePattern = Pattern.compile("-->");
+
+	@Override
+	protected Pattern closePattern() {
+		return closePattern;
+	}
+
+	@Override
+	protected Pattern startPattern() {
+		return startPattern;
+	}
+}
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlConstants.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlConstants.java
deleted file mode 100644
index 9687a68..0000000
--- a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlConstants.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2015 David Green.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- *     David Green - initial API and implementation
- *******************************************************************************/
-
-package org.eclipse.mylyn.internal.wikitext.commonmark.blocks;
-
-class HtmlConstants {
-
-	private static final String ATTRIBUTE_VALUE_QUOTED = "\"[^<\"]*\"";
-
-	private static final String ATTRIBUTE_VALUE_SINGLEQUOTED = "'[^<']*'";
-
-	private static final String ATTRIBUTE_VALUE_UNQUOTED = "[^\"'<>=]+";
-
-	private static final String ATTRIBUTE_VALUE = "(?:" + ATTRIBUTE_VALUE_QUOTED + "|" + ATTRIBUTE_VALUE_SINGLEQUOTED
-			+ "|" + ATTRIBUTE_VALUE_UNQUOTED + ")";
-
-	private static final String ATTRIBUTE_NAME = "[a-zA-Z_][a-zA-Z0-9_:.-]*";
-
-	static final String ATTRIBUTE = "(?:" + ATTRIBUTE_NAME + "(?:\\s*=\\s*" + ATTRIBUTE_VALUE + ")?)";
-
-	static final String REPEATING_ATTRIBUTE = "(?:\\s+" + ATTRIBUTE + ")*";
-
-	static final String HTML_TAG_NAME = "([a-zA-Z_][a-zA-Z0-9_:-]*)";
-
-	private HtmlConstants() {
-		// prevent instantiation
-	}
-}
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlDoctypeBlock.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlDoctypeBlock.java
new file mode 100644
index 0000000..4630d3b
--- /dev/null
+++ b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlDoctypeBlock.java
@@ -0,0 +1,31 @@
+/*******************************************************************************
+ * Copyright (c) 2015 David Green.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     David Green - initial API and implementation
+ *******************************************************************************/
+
+package org.eclipse.mylyn.internal.wikitext.commonmark.blocks;
+
+import java.util.regex.Pattern;
+
+public class HtmlDoctypeBlock extends AbstractHtmlBlock {
+
+	private final Pattern startPattern = Pattern.compile("\\s{0,3}<![A-Z].*");
+
+	private final Pattern closePattern = Pattern.compile(">");
+
+	@Override
+	protected Pattern closePattern() {
+		return closePattern;
+	}
+
+	@Override
+	protected Pattern startPattern() {
+		return startPattern;
+	}
+}
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlProcessingInstructionBlock.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlProcessingInstructionBlock.java
new file mode 100644
index 0000000..ef69cc2
--- /dev/null
+++ b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlProcessingInstructionBlock.java
@@ -0,0 +1,31 @@
+/*******************************************************************************
+ * Copyright (c) 2015 David Green.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     David Green - initial API and implementation
+ *******************************************************************************/
+
+package org.eclipse.mylyn.internal.wikitext.commonmark.blocks;
+
+import java.util.regex.Pattern;
+
+public class HtmlProcessingInstructionBlock extends AbstractHtmlBlock {
+
+	private final Pattern startPattern = Pattern.compile("\\s{0,3}<\\?.*");
+
+	private final Pattern closePattern = Pattern.compile("\\?>");
+
+	@Override
+	protected Pattern closePattern() {
+		return closePattern;
+	}
+
+	@Override
+	protected Pattern startPattern() {
+		return startPattern;
+	}
+}
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlType1Block.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlType1Block.java
index dba25a2..1c7256e 100644
--- a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlType1Block.java
+++ b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlType1Block.java
@@ -13,13 +13,7 @@
 
 import java.util.regex.Pattern;
 
-import org.eclipse.mylyn.internal.wikitext.commonmark.Line;
-import org.eclipse.mylyn.internal.wikitext.commonmark.LineSequence;
-import org.eclipse.mylyn.internal.wikitext.commonmark.ProcessingContext;
-import org.eclipse.mylyn.internal.wikitext.commonmark.SourceBlock;
-import org.eclipse.mylyn.wikitext.core.parser.DocumentBuilder;
-
-public class HtmlType1Block extends SourceBlock {
+public class HtmlType1Block extends AbstractHtmlBlock {
 
 	private final Pattern startPattern = Pattern.compile("\\s{0,3}<(?:pre|script|style)(\\s|>|$).*",
 			Pattern.CASE_INSENSITIVE);
@@ -28,28 +22,12 @@
 			Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
 
 	@Override
-	public void process(ProcessingContext context, DocumentBuilder builder, LineSequence lineSequence) {
-		Line line = lineSequence.getCurrentLine();
-		while (line != null && !line.isEmpty()) {
-			builder.charactersUnescaped(line.getText());
-			builder.charactersUnescaped("\n");
-
-			lineSequence.advance();
-
-			if (closePattern.matcher(line.getText()).find()) {
-				break;
-			}
-
-			line = lineSequence.getCurrentLine();
-		}
+	protected Pattern closePattern() {
+		return closePattern;
 	}
 
 	@Override
-	public boolean canStart(LineSequence lineSequence) {
-		Line line = lineSequence.getCurrentLine();
-		if (line != null) {
-			return startPattern.matcher(line.getText()).matches();
-		}
-		return false;
+	protected Pattern startPattern() {
+		return startPattern;
 	}
 }
diff --git a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlType7Block.java b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlType7Block.java
index 48bfc0f..9240a12 100644
--- a/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlType7Block.java
+++ b/org.eclipse.mylyn.wikitext.commonmark/src/org/eclipse/mylyn/internal/wikitext/commonmark/blocks/HtmlType7Block.java
@@ -11,9 +11,6 @@
 
 package org.eclipse.mylyn.internal.wikitext.commonmark.blocks;
 
-import static org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HtmlConstants.HTML_TAG_NAME;
-import static org.eclipse.mylyn.internal.wikitext.commonmark.blocks.HtmlConstants.REPEATING_ATTRIBUTE;
-
 import java.util.regex.Pattern;
 
 import org.eclipse.mylyn.internal.wikitext.commonmark.Line;
@@ -24,6 +21,23 @@
 
 public class HtmlType7Block extends SourceBlock {
 
+	private static final String ATTRIBUTE_VALUE_QUOTED = "\"[^<\"]*\"";
+
+	private static final String ATTRIBUTE_VALUE_SINGLEQUOTED = "'[^<']*'";
+
+	private static final String ATTRIBUTE_VALUE_UNQUOTED = "[^\"'<>=]+";
+
+	private static final String ATTRIBUTE_VALUE = "(?:" + ATTRIBUTE_VALUE_QUOTED + "|" + ATTRIBUTE_VALUE_SINGLEQUOTED
+			+ "|" + ATTRIBUTE_VALUE_UNQUOTED + ")";
+
+	private static final String ATTRIBUTE_NAME = "[a-zA-Z_][a-zA-Z0-9_:.-]*";
+
+	private static final String ATTRIBUTE = "(?:" + ATTRIBUTE_NAME + "(?:\\s*=\\s*" + ATTRIBUTE_VALUE + ")?)";
+
+	private static final String REPEATING_ATTRIBUTE = "(?:\\s+" + ATTRIBUTE + ")*";
+
+	private static final String HTML_TAG_NAME = "([a-zA-Z_][a-zA-Z0-9_:-]*)";
+
 	private final Pattern startPattern = Pattern.compile(
 			"\\s{0,3}<" + HTML_TAG_NAME + REPEATING_ATTRIBUTE + "\\s*>?\\s*",
 			Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);