blob: 32ed6f278239810d5e4c84c3bac9767dce9f6951 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2012-2014 Torkild U. Resheim
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Torkild U. Resheim - initial API and implementation
*******************************************************************************/
package org.eclipse.mylyn.docs.epub.internal;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.eclipse.mylyn.docs.epub.core.ValidationMessage;
import org.eclipse.mylyn.docs.epub.core.ValidationMessage.Severity;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
* This type is a SAX parser that will read an <i>Open Publication Structure</i> file and optionally produce a new
* version where elements and attributes not in the EPUB 2.0.1 <b>preferred</b> vocabulary are stripped. Alternatively
* warnings can be issued when such elements and attributes are found and the contents left as is.
*
* @author Torkild U. Resheim
* @see http://idpf.org/epub/20/spec/OPS_2.0.1_draft.htm
*/
public class OPSValidator extends DefaultHandler {
public enum Mode {
/** Remove non-preferred elements and attributes */
REMOVE,
/** Issue warnings when non-preferred elements or attributes are found */
WARN
}
public static String clean(InputSource file, String href)
throws ParserConfigurationException, SAXException, IOException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setFeature("http://xml.org/sax/features/validation", false); //$NON-NLS-1$
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); //$NON-NLS-1$
SAXParser parser = factory.newSAXParser();
OPSValidator tocGenerator = new OPSValidator(href, Mode.REMOVE);
try {
parser.parse(file, tocGenerator);
return tocGenerator.getContents().toString();
} catch (SAXException e) {
System.err.println("Could not parse " + href); //$NON-NLS-1$
e.printStackTrace();
}
return null;
}
public static List<ValidationMessage> validate(InputSource file, String href)
throws ParserConfigurationException, SAXException, IOException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setFeature("http://xml.org/sax/features/validation", false); //$NON-NLS-1$
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); //$NON-NLS-1$
SAXParser parser = factory.newSAXParser();
OPSValidator tocGenerator = new OPSValidator(href, Mode.WARN);
try {
parser.parse(file, tocGenerator);
return tocGenerator.getMessages();
} catch (SAXException e) {
System.err.println("Could not parse " + href); //$NON-NLS-1$
e.printStackTrace();
}
return null;
}
private StringBuilder buffer = null;
private StringBuilder contents = null;
private final String href;
@SuppressWarnings("nls")
private final String[] legalAttributes = new String[] { "accesskey", "charset", "class", "coords", "dir", "href",
"hreflang", "id", "rel", "rev", "shape", "style", "tabindex", "target", "title", "type", "xml:lang",
/* Are these OK? */
"xmlns", "src", "alt" };
/**
* A list of legal elements according to the EPUB 2.0.1 specification
*
* @see http://idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#Section1.3.4
* @see http://idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#Section2.2
*/
@SuppressWarnings("nls")
private final String[] legalElements = new String[] { "body", "head", "html", "title", "abbr", "acronym", "address",
"blockquote", "br", "cite", "code", "dfn", "div", "em", "h1", "h2", "h3", "h4", "h5", "h6", "kbd", "p",
"pre", "q", "samp", "span", "strong", "var", "a", "dl", "dt", "dd", "ol", "ul", "li", "object", "param",
"b", "big", "hr", "i", "small", "sub", "sup", "tt", "del", "ins", "bdo", "caption", "col", "colgroup",
"table", "tbody", "td", "tfoot", "th", "thead", "tr", "img", "area", "map", "style", "link", "base" };
private final ArrayList<ValidationMessage> messages;
private Mode mode = Mode.WARN;
/**
* A list of elements that should be let through regardless of contents. Some publishers use this element to handle
* features present in certain reading systems.
*/
@SuppressWarnings("nls")
private final String[] passthroughElements = new String[] { "meta" };
private boolean recording = false;
public OPSValidator(String href, Mode mode) {
super();
this.href = href;
buffer = new StringBuilder();
contents = new StringBuilder();
messages = new ArrayList<ValidationMessage>();
this.mode = mode;
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (recording) {
buffer.append(ch, start, length);
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (isLegalElement(qName)) {
contents.append(buffer);
contents.append("</" + qName + ">"); //$NON-NLS-1$ //$NON-NLS-2$
buffer.setLength(0);
}
recording = false;
}
public StringBuilder getContents() {
return contents;
}
public ArrayList<ValidationMessage> getMessages() {
return messages;
}
/**
* Returns <code>true</code> if the given attribute name is legal.
*
* @param name
* @return
*/
private boolean isLegalAttribute(String name) {
for (String legal : legalAttributes) {
if (name.equalsIgnoreCase(legal)) {
return true;
}
}
return false;
}
private boolean isLegalElement(String name) {
for (String legal : legalElements) {
if (name.equalsIgnoreCase(legal)) {
return true;
}
}
return false;
}
/**
* Use to determine whether or not elements with this name should be let trough the validator with any change or
* warning.
*
* @param name
* the name of the element
* @return whether or not to pass through
*/
private boolean isPassthroughElement(String name) {
for (String legal : passthroughElements) {
if (name.equalsIgnoreCase(legal)) {
return true;
}
}
return false;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (isPassthroughElement(qName)) {
// Record any text content
contents.append('<');
contents.append(qName);
for (int i = 0; i < attributes.getLength(); i++) {
String name = attributes.getQName(i);
contents.append(' ');
contents.append(name);
contents.append("=\""); //$NON-NLS-1$
contents.append(attributes.getValue(i));
contents.append("\""); //$NON-NLS-1$
}
contents.append('>');
recording = true;
} else if (mode.equals(Mode.WARN) || isLegalElement(qName)) {
// Record any text content
contents.append('<');
contents.append(qName);
for (int i = 0; i < attributes.getLength(); i++) {
String name = attributes.getQName(i);
if (mode.equals(Mode.WARN) || isLegalAttribute(name)) {
contents.append(' ');
contents.append(name);
contents.append("=\""); //$NON-NLS-1$
contents.append(attributes.getValue(i));
contents.append("\""); //$NON-NLS-1$
if (!isLegalAttribute(name)) {
messages.add(new ValidationMessage(Severity.WARNING, MessageFormat.format(
"Attribute \"{0}\" in file \"{1}\" is not in OPS Preferred Vocabularies", name, href))); //$NON-NLS-1$
}
}
}
contents.append('>');
recording = true;
if (!isLegalElement(qName)) {
messages.add(new ValidationMessage(Severity.WARNING, MessageFormat
.format("Element \"{0}\" in file \"{1}\" is not in OPS Preferred Vocabularies", qName, href))); //$NON-NLS-1$
}
}
}
}