blob: 812aa88382aa680c5528f956510ef6fa283b51eb [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2011, 2015 Tasktop Technologies.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* David Green - initial API and implementation
*******************************************************************************/
package org.eclipse.mylyn.wikitext.parser;
import static com.google.common.base.Preconditions.checkNotNull;
import java.io.IOException;
import org.eclipse.mylyn.wikitext.internal.parser.html.AbstractSaxHtmlParser;
import org.eclipse.mylyn.wikitext.internal.parser.html.HtmlCleaner;
import org.eclipse.mylyn.wikitext.internal.parser.html.XHtmlParser;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* A parser for HTML, driving {@link DocumentBuilder}. Depending on parsers available at runtime, input may need to be
* well-formed XHTML.
*
* @see DocumentBuilder
* @author David Green
* @see #instanceWithHtmlCleanupRules()
* @since 3.0
*/
public class HtmlParser {
private final AbstractSaxHtmlParser delegate;
private HtmlParser(AbstractSaxHtmlParser parser) {
this.delegate = checkNotNull(parser);
}
public HtmlParser() {
AbstractSaxHtmlParser parser;
if (isJsoupAvailable()) {
parser = new org.eclipse.mylyn.wikitext.internal.parser.html.HtmlParser();
} else {
parser = new XHtmlParser();
}
this.delegate = parser;
}
/**
* Provides a parser instance with cleanup rules that make the result more suitable for generating wiki markup.
*/
public static HtmlParser instanceWithHtmlCleanupRules() {
org.eclipse.mylyn.wikitext.internal.parser.html.HtmlParser parser = new org.eclipse.mylyn.wikitext.internal.parser.html.HtmlParser();
HtmlCleaner htmlCleaner = new HtmlCleaner();
htmlCleaner.configure(parser);
return new HtmlParser(parser);
}
/**
* Creates a new parser instance.
*/
public static HtmlParser instance() {
return new HtmlParser();
}
AbstractSaxHtmlParser getDelegate() {
return delegate;
}
/**
* Parses well-formed XHTML from the given input, and emit an approximation of the source document to the given
* document builder. Equivalent to {@code parse(input,builder,true)}
*
* @param input
* the source input
* @param builder
* the builder to which output is provided
* @throws IOException
* @throws SAXException
*/
public void parse(InputSource input, DocumentBuilder builder) throws IOException, SAXException {
parse(input, builder, true);
}
/**
* Parses well-formed XHTML or HTML from the given input, and emit an approximation of the source document to the
* given document builder.
*
* @param input
* the source input
* @param builder
* the builder to which output is provided
* @param asDocument
* indicates if the builder should be driven as a {@link DocumentBuilder#beginDocument() document}.
*/
public void parse(InputSource input, DocumentBuilder builder, boolean asDocument) throws IOException, SAXException {
checkNotNull(input);
checkNotNull(builder);
delegate.parse(input, builder, asDocument);
}
boolean isJsoupAvailable() {
try {
Class.forName("org.jsoup.Jsoup", true, HtmlParser.class.getClassLoader()); //$NON-NLS-1$
return true;
} catch (Throwable t) {
return false;
}
}
}