blob: 01c0246f4b6a36734c1b1599d7be8c42a0f89775 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2011-2014 Torkild U. Resheim.
*
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the Eclipse Public License v1.0 which
* accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Torkild U. Resheim - initial API and implementation
*******************************************************************************/
package org.eclipse.mylyn.internal.docs.epub.core;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.eclipse.mylyn.docs.epub.opf.Item;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* This type is a SAX parser that will read a XHTML file and create a list of all images, content and CSS-files that are
* referenced either through an <i>img<i/> tag or a link.
*
* @author Torkild U. Resheim
*/
public class ReferenceScanner extends AbstractXHTMLScanner {
public static List<File> parse(Item item) throws ParserConfigurationException, SAXException, IOException {
FileReader fr = new FileReader(item.getFile());
InputSource file = new InputSource(fr);
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setFeature("http://xml.org/sax/features/validation", false); //$NON-NLS-1$
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); //$NON-NLS-1$
SAXParser parser = factory.newSAXParser();
String href = item.getHref();
ReferenceScanner scanner = new ReferenceScanner(item);
try {
parser.parse(file, scanner);
return scanner.files;
} catch (SAXException e) {
System.err.println("Could not parse " + href); //$NON-NLS-1$
e.printStackTrace();
}
return null;
}
Item currentItem;
ArrayList<File> files;
public ReferenceScanner(Item item) {
super();
files = new ArrayList<File>();
currentItem = item;
}
/**
* Case-insensitive method for obtaining an attribute.
*
* @param attributes
* SAX attributes
* @param name
* the attribute name
* @return the attribute value or <code>null</code>
*/
private String getAttribute(Attributes attributes, String name) {
for (int i = 0; i < attributes.getLength(); i++) {
String aname = attributes.getQName(i);
if (aname.equalsIgnoreCase(name)) {
return attributes.getValue(i);
}
}
return null;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
// Handle inline image files
if (qName.equalsIgnoreCase("img")) { //$NON-NLS-1$
String ref = getAttribute(attributes, "src"); //$NON-NLS-1$
if (ref != null) {
String t = ref.toLowerCase();
if (t.startsWith("http://") || t.startsWith("https://")) { //$NON-NLS-1$ //$NON-NLS-2$
return;
}
// If the reference is absolute it will kept as it is, otherwise
// me must make it absolute using the source file as a
// reference.
String source = currentItem.getSourcePath();
if (source == null) {
source = currentItem.getFile();
}
File sourceFile = new File(source);
File file = new File(ref);
if (!file.isAbsolute()) {
file = new File(sourceFile.getParentFile().getAbsolutePath() + File.separator + ref);
}
files.add(file);
}
}
// Also handle links to files
if (qName.equalsIgnoreCase("a")) { //$NON-NLS-1$
String ref = getAttribute(attributes, "href"); //$NON-NLS-1$
if (ref != null) {
includeRef(ref);
}
}
// Also handle links to CSS style sheets
if (qName.equalsIgnoreCase("link")) { //$NON-NLS-1$
String ref = getAttribute(attributes, "href"); //$NON-NLS-1$
String rel = getAttribute(attributes, "rel"); //$NON-NLS-1$
if (ref != null && rel != null && rel.equals("stylesheet")) { //$NON-NLS-1$
includeRef(ref);
}
}
}
private void includeRef(String ref) {
String t = ref.toLowerCase();
if (t.startsWith("#") || t.startsWith("http://") || t.startsWith("https://")) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
return;
}
// Handle links to anchors within resources (bug 375795).
if (ref.indexOf('#') > -1) {
ref = ref.substring(0, ref.indexOf('#'));
}
// If the item was generated for instance by WikiText we need to
// use the original path. Otherwise we use the item path.
String source = currentItem.getSourcePath();
if (source == null) {
source = currentItem.getFile();
}
File sourceFile = new File(source);
File file = new File(sourceFile.getParentFile().getAbsolutePath() + File.separator + ref);
if (!file.isDirectory()) {
files.add(file);
}
}
}