| /******************************************************************************* |
| * Copyright (c) 2011, 2013 Tasktop Technologies. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * David Green - initial API and implementation |
| *******************************************************************************/ |
| |
| package org.eclipse.mylyn.wikitext.internal.parser.html; |
| |
| import static com.google.common.base.Preconditions.checkNotNull; |
| |
| import java.io.BufferedInputStream; |
| import java.io.FileInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.Reader; |
| import java.io.StringWriter; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Stack; |
| |
| import org.eclipse.mylyn.wikitext.parser.DocumentBuilder; |
| import org.jsoup.Jsoup; |
| import org.jsoup.nodes.Attribute; |
| import org.jsoup.nodes.Document; |
| import org.jsoup.nodes.Element; |
| import org.jsoup.nodes.Node; |
| import org.jsoup.nodes.TextNode; |
| import org.xml.sax.Attributes; |
| import org.xml.sax.ContentHandler; |
| import org.xml.sax.InputSource; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.helpers.AttributesImpl; |
| |
| /** |
| * @author David Green |
| */ |
| public class HtmlParser extends AbstractSaxHtmlParser { |
| |
| private static abstract class NodeHandler { |
| |
| public abstract void process(Stack<NodeHandler> stack, ContentHandler contentHandler) throws SAXException; |
| |
| } |
| |
| private static class TextHandler extends NodeHandler { |
| |
| private final TextNode node; |
| |
| public TextHandler(TextNode node) { |
| this.node = node; |
| } |
| |
| @Override |
| public void process(java.util.Stack<NodeHandler> stack, ContentHandler contentHandler) throws SAXException { |
| String text = isPreserveWhitespace() ? node.getWholeText() : node.text(); |
| contentHandler.characters(text.toCharArray(), 0, text.length()); |
| } |
| |
| private boolean isPreserveWhitespace() { |
| Node parent = node.parent(); |
| while (parent != null) { |
| if (parent.nodeName().equals("pre")) { //$NON-NLS-1$ |
| return true; |
| } |
| parent = parent.parent(); |
| } |
| return false; |
| } |
| } |
| |
| private static class ElementHandler extends NodeHandler { |
| |
| private final Element element; |
| |
| public ElementHandler(Element element) { |
| this.element = element; |
| } |
| |
| @Override |
| public void process(Stack<NodeHandler> stack, ContentHandler contentHandler) throws SAXException { |
| contentHandler.startElement(null, element.nodeName(), element.nodeName(), computeAttributes()); |
| |
| stack.push(new EndElementHandler(element)); |
| List<Node> childNodes = element.childNodes(); |
| if (!childNodes.isEmpty()) { |
| for (int x = childNodes.size() - 1; x >= 0; --x) { |
| Node child = childNodes.get(x); |
| if (child instanceof Element) { |
| stack.push(new ElementHandler((Element) child)); |
| } else if (child instanceof TextNode) { |
| stack.push(new TextHandler((TextNode) child)); |
| } |
| } |
| } |
| } |
| |
| private Attributes computeAttributes() { |
| AttributesImpl attributes = new AttributesImpl(); |
| for (Attribute attr : element.attributes()) { |
| attributes.addAttribute(null, attr.getKey(), null, null, attr.getValue()); |
| } |
| return attributes; |
| } |
| |
| } |
| |
| private static class EndElementHandler extends NodeHandler { |
| |
| private final Element element; |
| |
| public EndElementHandler(Element element) { |
| this.element = element; |
| } |
| |
| @Override |
| public void process(Stack<NodeHandler> stack, ContentHandler contentHandler) throws SAXException { |
| contentHandler.endElement(null, element.nodeName(), element.nodeName()); |
| } |
| |
| } |
| |
| private static class DocumentHandler extends NodeHandler { |
| |
| private final Document document; |
| |
| public DocumentHandler(Document doc) { |
| this.document = doc; |
| } |
| |
| @Override |
| public void process(Stack<NodeHandler> stack, ContentHandler contentHandler) throws SAXException { |
| contentHandler.startDocument(); |
| |
| stack.push(new EndDocumentHandler()); |
| |
| stack.push(new ElementHandler(document.body())); |
| } |
| } |
| |
| private static class EndDocumentHandler extends NodeHandler { |
| |
| public EndDocumentHandler() { |
| } |
| |
| @Override |
| public void process(Stack<NodeHandler> stack, ContentHandler contentHandler) throws SAXException { |
| contentHandler.endDocument(); |
| |
| } |
| } |
| |
| private List<DocumentProcessor> processors = new ArrayList<DocumentProcessor>(); |
| |
| @Override |
| protected void parse(InputSource input, DocumentBuilder builder, ContentHandler contentHandler) throws IOException, |
| SAXException { |
| Document document = Jsoup.parse(readContent(input)); |
| |
| for (DocumentProcessor processor : processors) { |
| processor.process(document); |
| } |
| |
| Stack<NodeHandler> stack = new Stack<NodeHandler>(); |
| stack.push(new DocumentHandler(document)); |
| while (!stack.isEmpty()) { |
| NodeHandler handler = stack.pop(); |
| handler.process(stack, contentHandler); |
| } |
| } |
| |
| public List<DocumentProcessor> getProcessors() { |
| return processors; |
| } |
| |
| public void setProcessors(List<DocumentProcessor> processors) { |
| this.processors = checkNotNull(processors); |
| } |
| |
| private String readContent(InputSource input) throws IOException { |
| Reader reader = input.getCharacterStream(); |
| try { |
| if (reader == null) { |
| InputStream bytes = input.getByteStream(); |
| if (bytes == null) { |
| String systemId = input.getSystemId(); |
| if (systemId != null) { |
| bytes = new BufferedInputStream(new FileInputStream(systemId)); |
| } |
| if (bytes == null) { |
| throw new IllegalArgumentException(); |
| } |
| } |
| reader = new InputStreamReader(bytes, input.getEncoding() == null ? "utf-8" : input.getEncoding()); //$NON-NLS-1$ |
| } |
| StringWriter writer = new StringWriter(2048); |
| for (int i = reader.read(); i != -1; i = reader.read()) { |
| writer.write(i); |
| } |
| return writer.toString(); |
| } finally { |
| if (reader != null) { |
| reader.close(); |
| } |
| } |
| } |
| } |