blob: 1f2517ef313a9f4a9c311767792012797e3eac33 [file] [log] [blame]
* Copyright (c) 2011-2014 Torkild U. Resheim.
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the Eclipse Public License v1.0 which
* accompanies this distribution, and is available at
* Contributors:
* Torkild U. Resheim - initial API and implementation
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.eclipse.emf.common.util.EList;
import org.eclipse.emf.common.util.URI;
import org.eclipse.emf.ecore.resource.Resource;
import org.eclipse.emf.ecore.resource.ResourceSet;
import org.eclipse.emf.ecore.resource.impl.ResourceSetImpl;
import org.eclipse.emf.ecore.xmi.XMLResource;
import org.eclipse.emf.ecore.xmi.impl.XMLResourceFactoryImpl;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.ext.DefaultHandler2;
* Represents one EPUB file. Currently <b>only</b> version 2.0.1 of the EPUB specification is supported. One or more
* publications can be added and will be a part of the distribution when packed. See the <a
* href="">OPS specification</a> for definitions of words and
* terms.
* <p>
* The simplest usage of this API may look like the following:
* </p>
* <pre>
* EPUB epub = new EPUB();
* OPSPublication oebps = new OPS2Publication();
* oebps.addItem(new File(&quot;chapter.xhtml&quot;));
* epub.add(oebps);
* epub.pack(new File(&quot;book.epub&quot;));
* </pre>
* <p>
* This will create a new EPUB instance and an OPS (which is the typical content of an EPUB) with one chapter. The OPS
* will have one chapter with contents from <b>chapter.xhtml</b> and the final result is an EPUB named <b>book.epub</b>.
* </p>
* @author Torkild U. Resheim
* @see
public class EPUB {
* SAX parser for detecting the version of an OEBPS contained within an EPUB.
private class VersionDetector extends DefaultHandler2 {
private String versionString;
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (qName.equals("opf:package") || qName.equals("package")) {//$NON-NLS-1$ //$NON-NLS-2$
versionString = attributes.getValue("version"); //$NON-NLS-1$
/** EPUB MIME type */
public static final String MIMETYPE_EPUB = "application/epub+zip"; //$NON-NLS-1$
/** OEBPS (OPS+OPF) MIME type */
private static final String MIMETYPE_OEBPS = "application/oebps-package+xml"; //$NON-NLS-1$
/** The encoding to use for the OCF */
private static final String OCF_FILE_ENCODING = "UTF-8"; //$NON-NLS-1$
/** Suffix for OCF files */
private static final String OCF_FILE_SUFFIX = "xml"; //$NON-NLS-1$
/** Version of the OCF specification used */
private static final String OCF_VERSION = "1.0"; //$NON-NLS-1$
private ILogger logger;
/** The container holding all the publications */
private Container ocfContainer;
* Creates a new <b>empty</b> instance of an EPUB. Use {@link #add(Publication)} and {@link #pack(File)} to add
* publications and ready the EPUB for distribution.
public EPUB() {
ocfContainer = OCFFactory.eINSTANCE.createContainer();
RootFiles rootFiles = OCFFactory.eINSTANCE.createRootFiles();
public EPUB(ILogger logger) {
this.logger = logger;
* Adds a new publication (or root file) to the EPUB. Use {@link #add(Publication)} when adding an OEBPS
* publication.
* <p>
* Note that while an {@link EPUB} can technically contain multiple instances of an {@link Publication}, in
* practice reading systems does not support this.
* </p>
* @param file
* the publication to add
* @param type
* the MIME type of the publication
* @see #add(Publication)
public void add(File file, String type) {
String name = type.substring(type.lastIndexOf('/') + 1, type.length()).toUpperCase();
RootFiles rootFiles = ocfContainer.getRootfiles();
int count = rootFiles.getRootfiles().size();
if (count >= 1) {
log("Multiple root files is unsupported by most reading systems!", Severity.WARNING); //$NON-NLS-1$
String rootFileName = count > 0 ? name + "_" + count : name; //$NON-NLS-1$
rootFileName += File.separator + file.getName();
RootFile rootFile = OCFFactory.eINSTANCE.createRootFile();
log(MessageFormat.format(Messages.getString("EPUB.1"), rootFile.getFullPath(), //$NON-NLS-1$
rootFile.getMediaType()), Severity.VERBOSE);
* Adds a new OEBPS publication to the EPUB. Use {@link #add(File, String)} to add other types of content.
* <p>
* Note that while an {@link EPUB} can technically contain multiple instances of an {@link Publication}, in
* practice reading systems does not support this.
* </p>
* @param oebps
* the publication to add.
public void add(Publication oebps) {
RootFiles rootFiles = ocfContainer.getRootfiles();
int count = rootFiles.getRootfiles().size();
if (count >= 1) {
log("Multiple root files is unsupported by most reading systems!", Severity.WARNING); //$NON-NLS-1$
String rootFileName = count > 0 ? "OEBPS_" + count : "OEBPS"; //$NON-NLS-1$ //$NON-NLS-2$
rootFileName += "/content.opf"; //$NON-NLS-1$
RootFile rootFile = OCFFactory.eINSTANCE.createRootFile();
log(MessageFormat.format(Messages.getString("EPUB.0"), rootFile.getFullPath(), //$NON-NLS-1$
rootFile.getMediaType()), Severity.VERBOSE);
* Utility method for deleting a folder recursively.
* @param folder
* the folder to delete
private void deleteFolder(File folder) {
if (folder.isDirectory()) {
String[] children = folder.list();
for (String element : children) {
deleteFolder(new File(folder, element));
* Returns the container instance of the EPUB.
* @return the container instance
public Container getContainer() {
return ocfContainer;
* Returns a list of all <i>OPS publications</i> contained within the EPUB. Publications in unsupported versions
* will not be returned. However their existence can still be determined by looking at the
* {@link Container#getRootfiles()} result.
* @return a list of all OPS publications
* @see {@link #getContainer()} for obtaining the root file container
public List<Publication> getOPSPublications() {
ArrayList<Publication> publications = new ArrayList<Publication>();
EList<RootFile> rootFiles = ocfContainer.getRootfiles().getRootfiles();
for (RootFile rootFile : rootFiles) {
if (rootFile.getMediaType().equals(MIMETYPE_OEBPS)) {
// May be null if the publications is in an unsupported format.
if (rootFile.getPublication() != null) {
publications.add((Publication) rootFile.getPublication());
return publications;
* Use to check whether or not the specified file is in a supported format and can be opened as an EPUB. If it's not
* an EPUB <code>false</code> will be returned. Note that this methods does not test the contents of the EPUB which
* may or may not contain unsupported root files.
* @param epubFile
* the target EPUB file
* @return <code>true</code> if the file can be opened
* @throws IOException
public boolean isEPUB(File epubFile) throws IOException {
String mimeType = EPUBFileUtil.getMimeType(epubFile);
if (mimeType.equals(MIMETYPE_EPUB)) {
return isEPUB(new FileInputStream(epubFile));
return false;
private static final int BUFFERSIZE = 2048;
* Used to verify that the given {@link InputStream} contents is an EPUB. As per specification the first entry in
* the file must be named "mimetype" and contain the string <i>application/epub+zip</i>. Further verification is not
* done at this stage.
* @param inputStream
* the EPUB input stream
* @return <code>true</code> if the file is an EPUB file
* @throws IOException
public static boolean isEPUB(InputStream inputStream) throws IOException {
ZipInputStream in = new ZipInputStream(inputStream);
try {
byte[] buf = new byte[BUFFERSIZE];
ZipEntry entry = null;
if ((entry = in.getNextEntry()) != null) {
String entryName = entry.getName();
if (entryName.equals("mimetype")) { //$NON-NLS-1$
String type = new String();
while ((, 0, BUFFERSIZE)) > 0) {
type = type + new String(buf);
if (type.trim().equals(EPUB.MIMETYPE_EPUB)) {
return true;
} catch (IOException e) {
return false;
} finally {
return false;
* Tests whether or not the OEBPS is in a version that is supported by this tooling.
* @param rootFile
* the root file
* @return <code>true</code> if the OEBPS can be read
private boolean isSupportedOEBPS(File rootFile) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
VersionDetector vd = new VersionDetector();
SAXParser parser = factory.newSAXParser();
parser.parse(rootFile, vd);
if (vd.versionString == null) {
return false;
String[] segments = vd.versionString.split("\\."); //$NON-NLS-1$
if (segments[0].equals("2") && segments[1].equals("0")) { //$NON-NLS-1$ //$NON-NLS-2$
return true;
} else {
return false;
} catch (ParserConfigurationException e) {
return false;
} catch (SAXException e) {
return false;
} catch (IOException e) {
return false;
private void log(String message, Severity severity) {
if (logger != null) {
logger.log(message, severity);
* Assembles the EPUB file using a temporary working folder. The folder will be deleted as soon as the assembly has
* completed.
* @param epubFile
* the target EPUB file
* @throws Exception
public File pack(File epubFile) throws Exception {
File workingFolder = File.createTempFile("epub_", null); //$NON-NLS-1$
if (workingFolder.delete() && workingFolder.mkdirs()) {
pack(epubFile, workingFolder);
return workingFolder;
* Assembles the EPUB file using the specified working folder. The contents of the working folder will <b>not</b> be
* removed when the operation has completed. If the temporary data is not interesting, use {@link #pack(File)}
* instead.
* @param epubFile
* the target EPUB file
* @param rootFolder
* the root folder holding all the EPUB contents
* @throws Exception
* @see {@link #pack(File)}
public void pack(File epubFile, File rootFolder) throws Exception {
if (ocfContainer.getRootfiles().getRootfiles().isEmpty()) {
throw new ValidationException("EPUB does not contain any publications"); //$NON-NLS-1$
if (rootFolder.isDirectory() || rootFolder.mkdirs()) {
EList<RootFile> publications = ocfContainer.getRootfiles().getRootfiles();
log(MessageFormat.format(Messages.getString("EPUB.2"), epubFile.getAbsolutePath()), Severity.INFO); //$NON-NLS-1$
for (RootFile rootFile : publications) {
Object publication = rootFile.getPublication();
File root = new File(rootFolder.getAbsolutePath() + File.separator + rootFile.getFullPath());
if (publication instanceof Publication) {
((Publication) publication).pack(root);
} else {
if (rootFile.getPublication() instanceof File) {
EPUBFileUtil.copy((File) rootFile.getPublication(), root);
} else {
throw new IllegalArgumentException("Unknown publication type in root file"); //$NON-NLS-1$
}, rootFolder);
log(MessageFormat.format(Messages.getString("EPUB.3"), //$NON-NLS-1$
publications.size()), Severity.INFO);
} else {
throw new IOException("Could not create working folder in " + rootFolder.getAbsolutePath()); //$NON-NLS-1$
* Reads the <i>Open Container Format (OCF)</i> formatted list of contents of this EPUB. The result of this
* operation is placed in the {@link #ocfContainer} instance.
* @param rootFolder
* the folder where the EPUB was unpacked
* @throws IOException
* @see {@link #unpack(File)}
* @see {@link #unpack(File, File)}
* @see <a href="">EPUB3 OCF specification</a>
* @see <a href="">EPUB2 OCF specification</a>
protected void readOCF(File rootFolder) throws IOException {
// These file names are listed in the OCF specification and must not be
// changed.
File metaFolder = new File(rootFolder.getAbsolutePath() + File.separator + "META-INF"); //$NON-NLS-1$
File containerFile = new File(metaFolder.getAbsolutePath() + File.separator + "container.xml"); //$NON-NLS-1$
ResourceSet resourceSet = new ResourceSetImpl();
URI fileURI = URI.createFileURI(containerFile.getAbsolutePath());
Resource resource = resourceSet.createResource(fileURI);
ocfContainer = (Container) resource.getContents().get(0);
* Registers a new resource factory for OCF data structures. This is normally done through Eclipse extension points
* but we also need to be able to create this factory without the Eclipse runtime.
private void registerOCFResourceFactory() {
// Register package so that it is available even without the Eclipse
// runtime
OCFPackage packageInstance = OCFPackage.eINSTANCE;
// Register the file suffix
new XMLResourceFactoryImpl() {
public Resource createResource(URI uri) {
OCFResourceImpl xmiResource = new OCFResourceImpl(uri);
Map<Object, Object> loadOptions = xmiResource.getDefaultLoadOptions();
Map<Object, Object> saveOptions = xmiResource.getDefaultSaveOptions();
// We use extended metadata
saveOptions.put(XMLResource.OPTION_EXTENDED_META_DATA, Boolean.TRUE);
loadOptions.put(XMLResource.OPTION_EXTENDED_META_DATA, Boolean.TRUE);
// Required in order to correctly read in attributes
loadOptions.put(XMLResource.OPTION_LAX_FEATURE_PROCESSING, Boolean.TRUE);
// Treat "href" attributes as features
// UTF-8 encoding is required per specification
// Do not download any external DTDs.
Map<String, Object> parserFeatures = new HashMap<String, Object>();
parserFeatures.put("", Boolean.FALSE); //$NON-NLS-1$
parserFeatures.put("", //$NON-NLS-1$
loadOptions.put(XMLResource.OPTION_PARSER_FEATURES, parserFeatures);
return xmiResource;
* Unpacks the EPUB file to a temporary location and populates the data model with the content.
* @param epubFile
* the EPUB file to unpack
* @return the location when the EPUB is unpacked
* @throws Exception
* @see {@link #unpack(File, File)}
public File unpack(File epubFile) throws Exception {
File workingFolder = File.createTempFile("epub_", null); //$NON-NLS-1$
workingFolder.deleteOnExit(); // XXX: Avoid using deleteOnExit()
if (workingFolder.delete() && workingFolder.mkdirs()) {
unpack(epubFile, workingFolder);
return workingFolder;
* Unpacks the given EPUB file into the specified destination and populates the data model with the content. Note
* that when the destination folder already exists or is empty the file EPUB will not be unpacked or verified, but
* the contents of the destination will be treated as an already unpacked EPUB. If this behaviour is not desired one
* should take steps to delete the folder prior to unpacking.
* <p>
* When performing the unpacking, the modification date of the destination folder will be set to the modification
* date of the source EPUB. Additionally the contents of the EPUB will retain the original modification date if set.
* </p>
* <p>
* Multiple OPS root files in the publication will populate the OCF container instance with one
* {@link Publication} for each as expected. The contents of the data model starting with the OCF container will
* be replaced. If the publication is in an unsupported version it will not be added to the data model.
* </p>
* @param epubFile
* the EPUB file to unpack
* @param rootFolder
* the destination folder
* @throws Exception
* @see {@link #unpack(File)} when destination is not interesting
* @see {@link #getContainer()} to obtain the container instance
* @see {@link #getOPSPublications()} to get a list of all contained OPS publications
public void unpack(File epubFile, File rootFolder) throws Exception {
if (!isEPUB(epubFile)) {
throw new IllegalArgumentException(MessageFormat.format("{0} is not an EPUB file", epubFile)); //$NON-NLS-1$
if (!rootFolder.exists() || rootFolder.list().length == 0) {
EPUBFileUtil.unzip(epubFile, rootFolder);
EList<RootFile> rootFiles = ocfContainer.getRootfiles().getRootfiles();
for (RootFile rootFile : rootFiles) {
if (rootFile.getMediaType().equals(MIMETYPE_OEBPS)) {
File root = new File(rootFolder.getAbsolutePath() + File.separator + rootFile.getFullPath());
if (isSupportedOEBPS(root)) {
Publication ops = Publication.getVersion2Instance();
} else {
log(MessageFormat.format("Unsupported OEBPS version in root file {0}", rootFile.getFullPath()), //$NON-NLS-1$
* Creates a new folder named META-INF and writes the required (as per the OPS specification) <b>container.xml</b>
* in that folder. This is part of the packing procedure.
* @param rootFolder
* the root folder
* @see <a href="">EPUB3 OCF specification</a>
* @see <a href="">EPUB2 OCF specification</a>
private void writeOCF(File rootFolder) throws IOException {
File metaFolder = new File(rootFolder.getAbsolutePath() + File.separator + "META-INF"); //$NON-NLS-1$
if (metaFolder.mkdir()) {
File containerFile = new File(metaFolder.getAbsolutePath() + File.separator + "container.xml"); //$NON-NLS-1$
ResourceSet resourceSet = new ResourceSetImpl();
// Register the packages to make it available during loading.
URI fileURI = URI.createFileURI(containerFile.getAbsolutePath());
Resource resource = resourceSet.createResource(fileURI);