blob: 52c0aafe7a6c70903548a3f4d3e3fca0bbeb6639 [file] [log] [blame]
/*********************************************************************************************************************
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved.
* This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*********************************************************************************************************************/
package org.eclipse.smila.importing.crawler.feed;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.InvalidValueTypeException;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.datamodel.Value;
import com.sun.syndication.feed.synd.SyndCategory;
import com.sun.syndication.feed.synd.SyndContent;
import com.sun.syndication.feed.synd.SyndEnclosure;
import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.feed.synd.SyndImage;
import com.sun.syndication.feed.synd.SyndLink;
import com.sun.syndication.feed.synd.SyndPerson;
import com.sun.syndication.fetcher.FeedFetcher;
import com.sun.syndication.fetcher.impl.FeedFetcherCache;
import com.sun.syndication.fetcher.impl.HashMapFeedInfoCache;
import com.sun.syndication.fetcher.impl.HttpURLFeedFetcher;
/** Helper class to read a feed with the ROME API. */
public class RomeFeedReader {
/** local logger. */
private final Log _log = LogFactory.getLog(getClass());
/** read given feed, create record for each feed entry. */
public Collection<Record> readFeed(final String feedUrl) throws Exception {
final Collection<Record> results = new ArrayList<>();
// final ClassLoader oldCL = Thread.currentThread().getContextClassLoader(); // TODO s.u.
// Thread.currentThread().setContextClassLoader(SyndFeed.class.getClassLoader()); // TODO braucht man das?
final FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance();
final FeedFetcher feedFetcher = new HttpURLFeedFetcher(feedInfoCache);
// Thread.currentThread().setContextClassLoader(oldCL); // TODO s.o.
final SyndFeed feed = feedFetcher.retrieveFeed(new URL(feedUrl));
if (feed != null) {
if (_log.isInfoEnabled()) {
_log.info("Reading feed " + feedUrl);
}
@SuppressWarnings("unchecked")
final List<SyndEntry> entries = feed.getEntries();
if (entries != null) {
for (final SyndEntry entry : entries) {
final Record r = DataFactory.DEFAULT.createRecord();
for (final String feedProp : FeedProperties.ALL_PROPS) {
final Any value = readProperty(feed, entry, feedProp);
if (value != null) {
r.getMetadata().put(feedProp, value);
}
}
results.add(r);
}
}
}
return results;
}
@SuppressWarnings("unchecked")
private Any readProperty(final SyndFeed feed, final SyndEntry entry, final String property) {
switch (property) {
case FeedProperties.FEED_AUTHORS:
return getPersons(feed.getAuthors(), feed.getAuthor());
case FeedProperties.FEED_CATEGORIES:
return getCategories(feed.getCategories());
case FeedProperties.FEED_CONTRIBUTORS:
return getPersons(feed.getContributors(), null);
case FeedProperties.FEED_COPYRIGHT:
return getValue(feed.getCopyright());
case FeedProperties.FEED_DESCRIPTION:
return getValue(feed.getDescription());
case FeedProperties.FEED_ENCODING:
return getValue(feed.getEncoding());
case FeedProperties.FEED_TYPE:
return getValue(feed.getFeedType());
case FeedProperties.FEED_IMAGE:
return getImage(feed.getImage());
case FeedProperties.FEED_LANGUAGE:
return getValue(feed.getLanguage());
case FeedProperties.FEED_LINKS:
return getLinks(feed.getLinks(), feed.getLink());
case FeedProperties.FEED_PUBLISH_DATE:
return getValue(feed.getPublishedDate());
case FeedProperties.FEED_TITLE:
return getValue(feed.getTitle());
case FeedProperties.FEED_URI:
return getValue(feed.getUri());
// feed entry attributes
case FeedProperties.FEED_ENTRY_AUTHORS:
return getPersons(entry.getAuthors(), entry.getAuthor());
case FeedProperties.FEED_ENTRY_CATEGORIES:
return getCategories(entry.getCategories());
case FeedProperties.FEED_ENTRY_CONTENTS:
return getContents(entry.getContents());
case FeedProperties.FEED_ENTRY_CONTRIBUTORS:
return getPersons(entry.getContributors(), null);
case FeedProperties.FEED_ENTRY_DESCRIPTION:
return getContent(entry.getDescription());
case FeedProperties.FEED_ENTRY_ENCLOSURES:
return getEnclosures(entry.getEnclosures());
case FeedProperties.FEED_ENTRY_LINKS:
return getLinks(entry.getLinks(), entry.getLink());
case FeedProperties.FEED_ENTRY_PUBLISH_DATE:
return getValue(entry.getPublishedDate());
case FeedProperties.FEED_ENTRY_TITLE:
return getValue(entry.getTitle());
case FeedProperties.FEED_ENTRY_URI:
return getValue(entry.getUri());
case FeedProperties.FEED_ENTRY_UPDATE_DATE:
return getValue(entry.getUpdatedDate());
default:
throw new RuntimeException("Unknown feed property: " + property);
}
}
/** @return A {@link Value} object matching the given Object. */
private Value getValue(final Object object) {
if (object != null) {
return DataFactory.DEFAULT.autoConvertValue(object);
} else {
return null;
}
}
/** convert given Object to Value and put it in given map with given name as key. */
private void putValue(final AnyMap map, final String name, final Object object) {
final Any value = getValue(object);
if (value != null) {
map.put(name, value);
}
}
/**
* Creates an {@link AnySeq}, where each {@link AnyMap} contains all category information.
*
* @param categories
* a List of SyndCategory
* @return an {@link AnySeq} with the list of categories
*/
private AnySeq getCategories(final List<SyndCategory> categories) {
if (categories != null && !categories.isEmpty()) {
final AnySeq categoryList = DataFactory.DEFAULT.createAnySeq();
for (final SyndCategory category : categories) {
try {
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap();
putValue(anyMap, "Name", category.getName());
putValue(anyMap, "TaxanomyUri", category.getTaxonomyUri());
categoryList.add(anyMap);
} catch (final InvalidValueTypeException e) {
_log.error("Error while creating AnyMap for Categories", e);
}
}
return categoryList;
}
return null;
}
/**
* Creates an {@link AnyMap} containing all content information.
*
* @param content
* a SyndContent
* @return an {@link AnyMap} with the content information
*/
private AnyMap getContent(final SyndContent content) {
if (content != null) {
try {
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap();
if (content.getMode() != null) {
putValue(anyMap, "Mode", content.getMode());
}
if (content.getValue() != null) {
putValue(anyMap, "Value", content.getValue());
}
String type = content.getType();
if ("html".equals(type)) {
type = "text/html";
} else if ("xml".equals(type)) {
type = "text/xml";
} else if ("text".equals(type)) {
type = "text/Plain";
}
putValue(anyMap, "Type", type);
return anyMap;
} catch (final InvalidValueTypeException e) {
_log.error("Error while creating AnyMap for Content", e);
}
}
return null;
}
/**
* Creates an {@link AnySeq}, where each {@link AnyMap} contains all contents information.
*
* @param contents
* a List of SyndContent
* @return an {@link AnySeq} with the contents information
*/
private AnySeq getContents(final List<SyndContent> contents) {
if (contents != null && !contents.isEmpty()) {
final AnySeq anySeq = DataFactory.DEFAULT.createAnySeq();
for (final SyndContent content : contents) {
final AnyMap anyMap = getContent(content);
if (anyMap != null) {
anySeq.add(anyMap);
}
}
return anySeq;
}
return null;
}
/**
* Creates an {@link AnySeq}, where each {@link AnyMap} contains all enclosure information.
*
* @param enclosures
* a List of SyndEnclosure
* @return an {@link AnySeq} with the list of enclosures
*/
private AnySeq getEnclosures(final List<SyndEnclosure> enclosures) {
if (enclosures != null && !enclosures.isEmpty()) {
final AnySeq enclosureList = DataFactory.DEFAULT.createAnySeq();
for (final SyndEnclosure enclosure : enclosures) {
try {
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap();
putValue(anyMap, "Type", enclosure.getType());
putValue(anyMap, "Url", enclosure.getUrl());
putValue(anyMap, "Length", enclosure.getLength());
enclosureList.add(anyMap);
} catch (final InvalidValueTypeException e) {
_log.error("Error while creating AnyMap for Enclosures", e);
}
}
return enclosureList;
}
return null;
}
/**
* Creates an {@link AnyMap} containing all Image information.
*
* @param feedImage
* a SyndImage
* @return The {@link AnyMap} object with all information
*/
private AnyMap getImage(final SyndImage feedImage) {
if (feedImage != null) {
try {
final AnyMap imageMap = DataFactory.DEFAULT.createAnyMap();
putValue(imageMap, "Link", feedImage.getLink());
putValue(imageMap, "Title", feedImage.getTitle());
putValue(imageMap, "Url", feedImage.getUrl());
putValue(imageMap, "Description", feedImage.getDescription());
return imageMap;
} catch (final InvalidValueTypeException e) {
_log.error("Error while creating AnyMap for Image", e);
}
}
return null;
}
/**
* Creates an AnySeq, where each {@link AnyMap} contains all link information. If links is empty the fallbackValue
* will be used.
*
* @param links
* a List of SyndLink
* @param fallbackValue
* a fallback link href if parameter links is empty
* @return an {@link AnySeq} with all link information
*/
private AnySeq getLinks(final List<SyndLink> links, final String fallbackValue) {
final AnySeq anySeq = DataFactory.DEFAULT.createAnySeq();
if (links != null && !links.isEmpty()) {
for (final SyndLink link : links) {
try {
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap();
putValue(anyMap, "Href", link.getHref());
putValue(anyMap, "Hreflang", link.getHreflang());
putValue(anyMap, "Rel", link.getRel());
putValue(anyMap, "Title", link.getTitle());
putValue(anyMap, "Type", link.getType());
putValue(anyMap, "Length", link.getLength());
anySeq.add(anyMap);
} catch (final InvalidValueTypeException e) {
_log.error("Error while creating AnyMap for Links", e);
}
}
return anySeq;
} else if (fallbackValue != null && !fallbackValue.isEmpty()) {
try {
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap();
putValue(anyMap, "Href", fallbackValue);
anySeq.add(anyMap);
return anySeq;
} catch (final InvalidValueTypeException e) {
_log.error("Error while creating AnyMap for Links", e);
}
}
return null;
}
/**
* Creates an {@link AnySeq}, where each {@link Value} contains all person information. If persons is empty the
* fallbackValue will be used to create an AnySeq with one one person with given name
*
* @param persons
* a List of SyndPerson
* @param fallbackValue
* a fallback person name if parameter persons is empty
* @return an {@link AnySeq} object with the list of persons
*/
private AnySeq getPersons(final List<SyndPerson> persons, final String fallbackValue) {
final AnySeq anySeq = DataFactory.DEFAULT.createAnySeq();
if (persons != null && !persons.isEmpty()) {
for (final SyndPerson person : persons) {
try {
final AnyMap personMap = DataFactory.DEFAULT.createAnyMap();
putValue(personMap, "Email", person.getEmail());
putValue(personMap, "Name", person.getName());
putValue(personMap, "Uri", person.getUri());
anySeq.add(personMap);
} catch (final InvalidValueTypeException e) {
_log.error("Error while creating AnyMap for Person", e);
}
}
return anySeq;
} else if (fallbackValue != null && !fallbackValue.isEmpty()) {
try {
final AnyMap personMap = DataFactory.DEFAULT.createAnyMap();
putValue(personMap, "Name", fallbackValue);
anySeq.add(personMap);
return anySeq;
} catch (final InvalidValueTypeException e) {
_log.error("Error while creating AnyMap for Person", e);
}
}
return null;
}
}