/********************************************************************************************************************* | |
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. | |
* This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 | |
* which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html | |
*********************************************************************************************************************/ | |
package org.eclipse.smila.importing.crawler.feed; | |
import java.net.URL; | |
import java.util.ArrayList; | |
import java.util.Collection; | |
import java.util.List; | |
import org.apache.commons.logging.Log; | |
import org.apache.commons.logging.LogFactory; | |
import org.eclipse.smila.datamodel.Any; | |
import org.eclipse.smila.datamodel.AnyMap; | |
import org.eclipse.smila.datamodel.AnySeq; | |
import org.eclipse.smila.datamodel.DataFactory; | |
import org.eclipse.smila.datamodel.InvalidValueTypeException; | |
import org.eclipse.smila.datamodel.Record; | |
import org.eclipse.smila.datamodel.Value; | |
import com.sun.syndication.feed.synd.SyndCategory; | |
import com.sun.syndication.feed.synd.SyndContent; | |
import com.sun.syndication.feed.synd.SyndEnclosure; | |
import com.sun.syndication.feed.synd.SyndEntry; | |
import com.sun.syndication.feed.synd.SyndFeed; | |
import com.sun.syndication.feed.synd.SyndImage; | |
import com.sun.syndication.feed.synd.SyndLink; | |
import com.sun.syndication.feed.synd.SyndPerson; | |
import com.sun.syndication.fetcher.FeedFetcher; | |
import com.sun.syndication.fetcher.impl.FeedFetcherCache; | |
import com.sun.syndication.fetcher.impl.HashMapFeedInfoCache; | |
import com.sun.syndication.fetcher.impl.HttpURLFeedFetcher; | |
/** Helper class to read a feed with the ROME API. */ | |
public class RomeFeedReader { | |
/** local logger. */ | |
private final Log _log = LogFactory.getLog(getClass()); | |
/** read given feed, create record for each feed entry. */ | |
public Collection<Record> readFeed(final String feedUrl) throws Exception { | |
final Collection<Record> results = new ArrayList<>(); | |
// final ClassLoader oldCL = Thread.currentThread().getContextClassLoader(); // TODO s.u. | |
// Thread.currentThread().setContextClassLoader(SyndFeed.class.getClassLoader()); // TODO braucht man das? | |
final FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); | |
final FeedFetcher feedFetcher = new HttpURLFeedFetcher(feedInfoCache); | |
// Thread.currentThread().setContextClassLoader(oldCL); // TODO s.o. | |
final SyndFeed feed = feedFetcher.retrieveFeed(new URL(feedUrl)); | |
if (feed != null) { | |
if (_log.isInfoEnabled()) { | |
_log.info("Reading feed " + feedUrl); | |
} | |
@SuppressWarnings("unchecked") | |
final List<SyndEntry> entries = feed.getEntries(); | |
if (entries != null) { | |
for (final SyndEntry entry : entries) { | |
final Record r = DataFactory.DEFAULT.createRecord(); | |
for (final String feedProp : FeedProperties.ALL_PROPS) { | |
final Any value = readProperty(feed, entry, feedProp); | |
if (value != null) { | |
r.getMetadata().put(feedProp, value); | |
} | |
} | |
results.add(r); | |
} | |
} | |
} | |
return results; | |
} | |
@SuppressWarnings("unchecked") | |
private Any readProperty(final SyndFeed feed, final SyndEntry entry, final String property) { | |
switch (property) { | |
case FeedProperties.FEED_AUTHORS: | |
return getPersons(feed.getAuthors(), feed.getAuthor()); | |
case FeedProperties.FEED_CATEGORIES: | |
return getCategories(feed.getCategories()); | |
case FeedProperties.FEED_CONTRIBUTORS: | |
return getPersons(feed.getContributors(), null); | |
case FeedProperties.FEED_COPYRIGHT: | |
return getValue(feed.getCopyright()); | |
case FeedProperties.FEED_DESCRIPTION: | |
return getValue(feed.getDescription()); | |
case FeedProperties.FEED_ENCODING: | |
return getValue(feed.getEncoding()); | |
case FeedProperties.FEED_TYPE: | |
return getValue(feed.getFeedType()); | |
case FeedProperties.FEED_IMAGE: | |
return getImage(feed.getImage()); | |
case FeedProperties.FEED_LANGUAGE: | |
return getValue(feed.getLanguage()); | |
case FeedProperties.FEED_LINKS: | |
return getLinks(feed.getLinks(), feed.getLink()); | |
case FeedProperties.FEED_PUBLISH_DATE: | |
return getValue(feed.getPublishedDate()); | |
case FeedProperties.FEED_TITLE: | |
return getValue(feed.getTitle()); | |
case FeedProperties.FEED_URI: | |
return getValue(feed.getUri()); | |
// feed entry attributes | |
case FeedProperties.FEED_ENTRY_AUTHORS: | |
return getPersons(entry.getAuthors(), entry.getAuthor()); | |
case FeedProperties.FEED_ENTRY_CATEGORIES: | |
return getCategories(entry.getCategories()); | |
case FeedProperties.FEED_ENTRY_CONTENTS: | |
return getContents(entry.getContents()); | |
case FeedProperties.FEED_ENTRY_CONTRIBUTORS: | |
return getPersons(entry.getContributors(), null); | |
case FeedProperties.FEED_ENTRY_DESCRIPTION: | |
return getContent(entry.getDescription()); | |
case FeedProperties.FEED_ENTRY_ENCLOSURES: | |
return getEnclosures(entry.getEnclosures()); | |
case FeedProperties.FEED_ENTRY_LINKS: | |
return getLinks(entry.getLinks(), entry.getLink()); | |
case FeedProperties.FEED_ENTRY_PUBLISH_DATE: | |
return getValue(entry.getPublishedDate()); | |
case FeedProperties.FEED_ENTRY_TITLE: | |
return getValue(entry.getTitle()); | |
case FeedProperties.FEED_ENTRY_URI: | |
return getValue(entry.getUri()); | |
case FeedProperties.FEED_ENTRY_UPDATE_DATE: | |
return getValue(entry.getUpdatedDate()); | |
default: | |
throw new RuntimeException("Unknown feed property: " + property); | |
} | |
} | |
/** @return A {@link Value} object matching the given Object. */ | |
private Value getValue(final Object object) { | |
if (object != null) { | |
return DataFactory.DEFAULT.autoConvertValue(object); | |
} else { | |
return null; | |
} | |
} | |
/** convert given Object to Value and put it in given map with given name as key. */ | |
private void putValue(final AnyMap map, final String name, final Object object) { | |
final Any value = getValue(object); | |
if (value != null) { | |
map.put(name, value); | |
} | |
} | |
/** | |
* Creates an {@link AnySeq}, where each {@link AnyMap} contains all category information. | |
* | |
* @param categories | |
* a List of SyndCategory | |
* @return an {@link AnySeq} with the list of categories | |
*/ | |
private AnySeq getCategories(final List<SyndCategory> categories) { | |
if (categories != null && !categories.isEmpty()) { | |
final AnySeq categoryList = DataFactory.DEFAULT.createAnySeq(); | |
for (final SyndCategory category : categories) { | |
try { | |
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap(); | |
putValue(anyMap, "Name", category.getName()); | |
putValue(anyMap, "TaxanomyUri", category.getTaxonomyUri()); | |
categoryList.add(anyMap); | |
} catch (final InvalidValueTypeException e) { | |
_log.error("Error while creating AnyMap for Categories", e); | |
} | |
} | |
return categoryList; | |
} | |
return null; | |
} | |
/** | |
* Creates an {@link AnyMap} containing all content information. | |
* | |
* @param content | |
* a SyndContent | |
* @return an {@link AnyMap} with the content information | |
*/ | |
private AnyMap getContent(final SyndContent content) { | |
if (content != null) { | |
try { | |
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap(); | |
if (content.getMode() != null) { | |
putValue(anyMap, "Mode", content.getMode()); | |
} | |
if (content.getValue() != null) { | |
putValue(anyMap, "Value", content.getValue()); | |
} | |
String type = content.getType(); | |
if ("html".equals(type)) { | |
type = "text/html"; | |
} else if ("xml".equals(type)) { | |
type = "text/xml"; | |
} else if ("text".equals(type)) { | |
type = "text/Plain"; | |
} | |
putValue(anyMap, "Type", type); | |
return anyMap; | |
} catch (final InvalidValueTypeException e) { | |
_log.error("Error while creating AnyMap for Content", e); | |
} | |
} | |
return null; | |
} | |
/** | |
* Creates an {@link AnySeq}, where each {@link AnyMap} contains all contents information. | |
* | |
* @param contents | |
* a List of SyndContent | |
* @return an {@link AnySeq} with the contents information | |
*/ | |
private AnySeq getContents(final List<SyndContent> contents) { | |
if (contents != null && !contents.isEmpty()) { | |
final AnySeq anySeq = DataFactory.DEFAULT.createAnySeq(); | |
for (final SyndContent content : contents) { | |
final AnyMap anyMap = getContent(content); | |
if (anyMap != null) { | |
anySeq.add(anyMap); | |
} | |
} | |
return anySeq; | |
} | |
return null; | |
} | |
/** | |
* Creates an {@link AnySeq}, where each {@link AnyMap} contains all enclosure information. | |
* | |
* @param enclosures | |
* a List of SyndEnclosure | |
* @return an {@link AnySeq} with the list of enclosures | |
*/ | |
private AnySeq getEnclosures(final List<SyndEnclosure> enclosures) { | |
if (enclosures != null && !enclosures.isEmpty()) { | |
final AnySeq enclosureList = DataFactory.DEFAULT.createAnySeq(); | |
for (final SyndEnclosure enclosure : enclosures) { | |
try { | |
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap(); | |
putValue(anyMap, "Type", enclosure.getType()); | |
putValue(anyMap, "Url", enclosure.getUrl()); | |
putValue(anyMap, "Length", enclosure.getLength()); | |
enclosureList.add(anyMap); | |
} catch (final InvalidValueTypeException e) { | |
_log.error("Error while creating AnyMap for Enclosures", e); | |
} | |
} | |
return enclosureList; | |
} | |
return null; | |
} | |
/** | |
* Creates an {@link AnyMap} containing all Image information. | |
* | |
* @param feedImage | |
* a SyndImage | |
* @return The {@link AnyMap} object with all information | |
*/ | |
private AnyMap getImage(final SyndImage feedImage) { | |
if (feedImage != null) { | |
try { | |
final AnyMap imageMap = DataFactory.DEFAULT.createAnyMap(); | |
putValue(imageMap, "Link", feedImage.getLink()); | |
putValue(imageMap, "Title", feedImage.getTitle()); | |
putValue(imageMap, "Url", feedImage.getUrl()); | |
putValue(imageMap, "Description", feedImage.getDescription()); | |
return imageMap; | |
} catch (final InvalidValueTypeException e) { | |
_log.error("Error while creating AnyMap for Image", e); | |
} | |
} | |
return null; | |
} | |
/** | |
* Creates an AnySeq, where each {@link AnyMap} contains all link information. If links is empty the fallbackValue | |
* will be used. | |
* | |
* @param links | |
* a List of SyndLink | |
* @param fallbackValue | |
* a fallback link href if parameter links is empty | |
* @return an {@link AnySeq} with all link information | |
*/ | |
private AnySeq getLinks(final List<SyndLink> links, final String fallbackValue) { | |
final AnySeq anySeq = DataFactory.DEFAULT.createAnySeq(); | |
if (links != null && !links.isEmpty()) { | |
for (final SyndLink link : links) { | |
try { | |
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap(); | |
putValue(anyMap, "Href", link.getHref()); | |
putValue(anyMap, "Hreflang", link.getHreflang()); | |
putValue(anyMap, "Rel", link.getRel()); | |
putValue(anyMap, "Title", link.getTitle()); | |
putValue(anyMap, "Type", link.getType()); | |
putValue(anyMap, "Length", link.getLength()); | |
anySeq.add(anyMap); | |
} catch (final InvalidValueTypeException e) { | |
_log.error("Error while creating AnyMap for Links", e); | |
} | |
} | |
return anySeq; | |
} else if (fallbackValue != null && !fallbackValue.isEmpty()) { | |
try { | |
final AnyMap anyMap = DataFactory.DEFAULT.createAnyMap(); | |
putValue(anyMap, "Href", fallbackValue); | |
anySeq.add(anyMap); | |
return anySeq; | |
} catch (final InvalidValueTypeException e) { | |
_log.error("Error while creating AnyMap for Links", e); | |
} | |
} | |
return null; | |
} | |
/** | |
* Creates an {@link AnySeq}, where each {@link Value} contains all person information. If persons is empty the | |
* fallbackValue will be used to create an AnySeq with one one person with given name | |
* | |
* @param persons | |
* a List of SyndPerson | |
* @param fallbackValue | |
* a fallback person name if parameter persons is empty | |
* @return an {@link AnySeq} object with the list of persons | |
*/ | |
private AnySeq getPersons(final List<SyndPerson> persons, final String fallbackValue) { | |
final AnySeq anySeq = DataFactory.DEFAULT.createAnySeq(); | |
if (persons != null && !persons.isEmpty()) { | |
for (final SyndPerson person : persons) { | |
try { | |
final AnyMap personMap = DataFactory.DEFAULT.createAnyMap(); | |
putValue(personMap, "Email", person.getEmail()); | |
putValue(personMap, "Name", person.getName()); | |
putValue(personMap, "Uri", person.getUri()); | |
anySeq.add(personMap); | |
} catch (final InvalidValueTypeException e) { | |
_log.error("Error while creating AnyMap for Person", e); | |
} | |
} | |
return anySeq; | |
} else if (fallbackValue != null && !fallbackValue.isEmpty()) { | |
try { | |
final AnyMap personMap = DataFactory.DEFAULT.createAnyMap(); | |
putValue(personMap, "Name", fallbackValue); | |
anySeq.add(personMap); | |
return anySeq; | |
} catch (final InvalidValueTypeException e) { | |
_log.error("Error while creating AnyMap for Person", e); | |
} | |
} | |
return null; | |
} | |
} |