blob: 3a55e10492ca4f4097dce1f3c2f2f0509093cf4d [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2007, 2016 David Green and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* David Green - initial API and implementation
*******************************************************************************/
package org.eclipse.mylyn.internal.wikitext.mediawiki.core.ant.tasks;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.Project;
import org.apache.tools.ant.taskdefs.Get;
import org.eclipse.mylyn.wikitext.core.util.IgnoreDtdEntityResolver;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
class MediaWikiApiImageFetchingStrategy extends ImageFetchingStrategy {
private final Pattern imageTitlePattern = Pattern.compile("(?:Image|File):(.+)"); //$NON-NLS-1$
private URL url;
private String pageName;
@Override
public Set<String> fetchImages() {
if (pageName == null || pageName.length() == 0) {
throw new BuildException("please specify @pageName"); //$NON-NLS-1$
}
if (!pageName.equals(pageName.trim())) {
throw new BuildException("@pageName must not have leading or trailing whitespace"); //$NON-NLS-1$
}
String base;
try {
base = url.toURI().toString();
} catch (URISyntaxException e) {
throw new BuildException(e);
}
if (!base.endsWith("/")) { //$NON-NLS-1$
base += "/"; //$NON-NLS-1$
}
ImageFetchingContentHandler contentHandler = new ImageFetchingContentHandler();
String gimcontinue = null;
Set<String> filenames = new HashSet<String>();
final SAXParserFactory parserFactory = SAXParserFactory.newInstance();
parserFactory.setNamespaceAware(true);
parserFactory.setValidating(false);
int maxloop = 100;
do {
contentHandler.setGimcontinue(null);
URL apiUrl;
try {
String queryString = String.format(
"action=query&titles=%s&generator=images&prop=imageinfo&iiprop=url&format=xml%s", //$NON-NLS-1$
URLEncoder.encode(pageName, "UTF-8"), //$NON-NLS-1$
(gimcontinue == null ? "" : "&gimcontinue=" + URLEncoder.encode(gimcontinue, "UTF-8"))); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
apiUrl = new URL(base + "api.php?" + queryString); //$NON-NLS-1$
} catch (Exception e) {
throw new BuildException("Cannot compose API URL", e); //$NON-NLS-1$
}
Reader input;
try {
log("Fetching " + apiUrl, Project.MSG_VERBOSE); //$NON-NLS-1$
input = new InputStreamReader(new BufferedInputStream(apiUrl.openStream()), "UTF-8"); //$NON-NLS-1$
} catch (IOException e) {
throw new BuildException(String.format("Cannot contact %s: %s", apiUrl, e.getMessage()), e); //$NON-NLS-1$
}
try {
SAXParser saxParser = parserFactory.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
xmlReader.setEntityResolver(IgnoreDtdEntityResolver.getInstance());
xmlReader.setContentHandler(contentHandler);
try {
xmlReader.parse(new InputSource(input));
gimcontinue = contentHandler.getGimcontinue();
} catch (IOException e) {
throw new BuildException(String.format("Unexpected exception retrieving data from %s", apiUrl), e); //$NON-NLS-1$
} finally {
try {
input.close();
} catch (IOException e) {
// ignore
}
}
} catch (SAXException e) {
throw new BuildException("Unexpected error in XML content", e); //$NON-NLS-1$
} catch (ParserConfigurationException e) {
throw new BuildException("Cannot configure SAX parser", e); //$NON-NLS-1$
}
} while (gimcontinue != null && maxloop-- > 0);
int fileCount = 0;
for (Map.Entry<String, String> ent : contentHandler.imageTitleToUrl.entrySet()) {
String title = ent.getKey();
String imageUrl = ent.getValue();
Matcher titleMatcher = imageTitlePattern.matcher(title);
if (titleMatcher.matches()) {
String name = titleMatcher.group(1);
name = name.replace(' ', '_');
String qualifiedUrl = base;
if (imageUrl.matches("https?://.*")) { //$NON-NLS-1$
qualifiedUrl = imageUrl;
} else {
if (imageUrl.startsWith("/")) { //$NON-NLS-1$
qualifiedUrl += imageUrl.substring(1);
} else {
qualifiedUrl += imageUrl;
}
}
log("Fetching " + qualifiedUrl, Project.MSG_INFO); //$NON-NLS-1$
Get get = new Get();
get.setProject(getProject());
get.setLocation(getLocation());
try {
get.setSrc(new URL(qualifiedUrl));
} catch (MalformedURLException e) {
log("Skipping " + url + ": " + e.getMessage(), Project.MSG_WARN); //$NON-NLS-1$ //$NON-NLS-2$
continue;
}
get.setDest(new File(dest, name));
get.execute();
filenames.add(name);
++fileCount;
} else {
log(String.format("Unexpected title format: %s", title), Project.MSG_WARN); //$NON-NLS-1$
}
}
log("Fetched " + fileCount + " image files for " + pageName, Project.MSG_INFO); //$NON-NLS-1$ //$NON-NLS-2$
return filenames;
}
public URL getUrl() {
return url;
}
public void setUrl(URL url) {
this.url = url;
}
public String getPageName() {
return pageName;
}
public void setPageName(String pageName) {
this.pageName = pageName;
}
private class ImageFetchingContentHandler implements ContentHandler {
private final Map<String, String> imageTitleToUrl = new HashMap<String, String>();
private String currentPage = null;
private boolean inImageInfo = false;
private String gimcontinue = null;
public void setGimcontinue(String gimcontinue) {
this.gimcontinue = gimcontinue;
}
public String getGimcontinue() {
return gimcontinue;
}
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
if ("page".equals(localName)) { //$NON-NLS-1$
currentPage = atts.getValue("title"); //$NON-NLS-1$
} else if ("images".equals(localName) || "continue".equals(localName)) { //$NON-NLS-1$ //$NON-NLS-2$
gimcontinue = atts.getValue("gimcontinue"); //$NON-NLS-1$
} else if ("imageinfo".equals(localName)) { //$NON-NLS-1$
inImageInfo = true;
} else if (inImageInfo && "ii".equals(localName)) { //$NON-NLS-1$
imageTitleToUrl.put(currentPage, atts.getValue("url")); //$NON-NLS-1$
}
}
public void endElement(String uri, String localName, String qName) throws SAXException {
if ("page".equals(localName)) { //$NON-NLS-1$
currentPage = null;
} else if ("imageinfo".equals(localName)) { //$NON-NLS-1$
inImageInfo = false;
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
}
public void endDocument() throws SAXException {
}
public void endPrefixMapping(String prefix) throws SAXException {
}
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
}
public void processingInstruction(String target, String data) throws SAXException {
}
public void setDocumentLocator(Locator locator) {
}
public void skippedEntity(String name) throws SAXException {
}
public void startDocument() throws SAXException {
}
public void startPrefixMapping(String prefix, String uri) throws SAXException {
}
}
}