blob: be24cb5a9a68c59a4ad1f7beee0d16e8287b765c [file] [log] [blame]
/*********************************************************************************************************************
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This
* program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which
* accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*********************************************************************************************************************/
package org.eclipse.smila.importing.crawler.feed.test;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import org.apache.commons.io.IOUtils;
import org.eclipse.smila.bulkbuilder.BulkbuilderService;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.datamodel.ipc.BinaryObjectStreamIterator;
import org.eclipse.smila.importing.ImportingConstants;
import org.eclipse.smila.importing.crawler.feed.FeedCrawlerWorker;
import org.eclipse.smila.jobmanager.JobRunDataProvider;
import org.eclipse.smila.jobmanager.JobRunEngine;
import org.eclipse.smila.jobmanager.JobState;
import org.eclipse.smila.jobmanager.definitions.JobManagerConstants;
import org.eclipse.smila.objectstore.ObjectStoreException;
import org.eclipse.smila.objectstore.ObjectStoreService;
import org.eclipse.smila.objectstore.StoreObject;
import org.eclipse.smila.taskworker.Worker;
import org.eclipse.smila.test.DeclarativeServiceTestCase;
import org.eclipse.smila.utils.service.ServiceUtils;
import org.osgi.framework.ServiceReference;
public class TestFeedCrawlerWorker extends DeclarativeServiceTestCase {
private static final String STORE_NAME = "records";
private static final String BUCKET = "crawledEntries";
private static final String INVALID_URL = "http://anonymous.invalid/index.html";
private static final long MAX_WAIT_TIME_JOB_COMPLETION = 5000L; // ms
protected static final String BUCKETS_STORE = "buckets";
/** name of attribute containing the URL of the web resource. */
public static final String ATTRIBUTE_URL = "httpUrl";
private JobRunEngine _jobManager;
private BulkbuilderService _bulkbuilder;
private JobRunDataProvider _jobRunDataProvider;
private ObjectStoreService _objectStore;
@Override
protected void setUp() throws Exception {
super.setUp();
_jobManager = getService(JobRunEngine.class);
_bulkbuilder = getService(BulkbuilderService.class);
_jobRunDataProvider = getService(JobRunDataProvider.class);
_objectStore = getService(ObjectStoreService.class);
if (_objectStore.existsStore(STORE_NAME)) {
_objectStore.clearStore(STORE_NAME);
} else {
_objectStore.createStore(STORE_NAME, null);
}
}
/** tests OSGI service. */
@SuppressWarnings("rawtypes")
public void testService() throws Exception {
final ServiceReference[] services = ServiceUtils.getServiceReferences(Worker.class);
assertTrue("no worker services started.", services.length > 0);
for (final ServiceReference service : services) {
final Worker worker = ServiceUtils.getService(service, Worker.class);
if (worker instanceof FeedCrawlerWorker) {
assertEquals(FeedCrawlerWorker.NAME, worker.getName());
return; // worker found, test ok.
}
}
fail("FeedCrawlerWorker not found");
}
public void testCrawlRssFeed() throws Exception {
final String jobName = "crawlRssFeed";
final String jobRunId = _jobManager.startJob(jobName);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
checkCrawledRecords(100, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertEclipseRssProperties(metadata, false);
}
});
}
public void testCrawlAtomFeed() throws Exception {
final String jobName = "crawlAtomFeed";
final String jobRunId = _jobManager.startJob(jobName);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
checkCrawledRecords(15, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertTwitterAtomProperties(metadata, false);
}
});
}
public void testCrawlManyFeeds() throws Exception {
final String jobName = "crawlManyFeeds";
final String jobRunId = _jobManager.startJob(jobName);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
checkCrawledRecords(137, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertAtomOrRssProperties(metadata, false);
}
});
}
/** tests crawl job that contains a valid and an invalid feed url. */
public void testCrawlManyFeedsWithInvalidFeed() throws Exception {
final String jobName = "crawlManyFeedsWithInvalidFeed";
final String jobRunId = _jobManager.startJob(jobName);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
checkCrawledRecords(100, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertAtomOrRssProperties(metadata, false);
}
});
}
/** tests crawl job that contains only an invalid feed url. */
public void testCrawlInvalidFeedUrl() throws Exception {
final String jobName = "crawlInvalidFeedUrl";
final String jobRunId = _jobManager.startJob(jobName);
waitForJobRun(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION, JobState.FAILED);
checkCrawledRecords(0, null);
}
public void testCrawlAllProperties() throws Exception {
final String jobName = "crawlAllProperties";
final String jobRunId = _jobManager.startJob(jobName);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
checkCrawledRecords(137, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertAtomOrRssProperties(metadata, true);
}
});
}
public void testCrawlInputSlotSingleValue() throws Exception {
final String jobName = "crawlInputSlot";
final String jobRunId = _jobManager.startJob(jobName);
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl");
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/eclipse-smila-news.rss");
_bulkbuilder.addRecord(jobName, inputUrls);
_bulkbuilder.commitJob(jobName);
_jobManager.finishJob(jobName, jobRunId);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
checkCrawledRecords(100, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertEclipseRssProperties(metadata, true);
}
});
}
public void testCrawlInputSlotMultiValue() throws Exception {
final String jobName = "crawlInputSlot";
final String jobRunId = _jobManager.startJob(jobName);
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl");
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/eclipse-smila-news.rss");
_bulkbuilder.addRecord(jobName, inputUrls);
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/twitter-smila-search.atom");
_bulkbuilder.addRecord(jobName, inputUrls);
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/spiegel-nachrichten.rss");
_bulkbuilder.addRecord(jobName, inputUrls);
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online.atom");
_bulkbuilder.addRecord(jobName, inputUrls);
_bulkbuilder.commitJob(jobName);
_jobManager.finishJob(jobName, jobRunId);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
checkCrawledRecords(137, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertAtomOrRssProperties(metadata, true);
}
});
}
public void testCrawlInputSlotErrorHandling() throws Exception {
final String jobName = "crawlInputSlot";
final String jobRunId = _jobManager.startJob(jobName);
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl");
inputUrls.getMetadata().put(ATTRIBUTE_URL, INVALID_URL);
_bulkbuilder.addRecord(jobName, inputUrls);
_bulkbuilder.commitJob(jobName);
_jobManager.finishJob(jobName, jobRunId);
waitForJobRunFailed(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
}
public void testCrawlDeltaHashContent() throws Exception {
final String jobName = "crawlDeltaPropsContent";
String jobRunId = _jobManager.startJob(jobName);
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl");
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online.atom");
_bulkbuilder.addRecord(jobName, inputUrls);
_bulkbuilder.commitJob(jobName);
_jobManager.finishJob(jobName, jobRunId);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
final AnySeq deltaHashValuesOld = DataFactory.DEFAULT.createAnySeq();
checkCrawledRecords(14, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertZeitAtomProperties(metadata, false);
}
}, deltaHashValuesOld);
_objectStore.clearStore(STORE_NAME);
jobRunId = _jobManager.startJob(jobName);
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online-newer.atom");
_bulkbuilder.addRecord(jobName, inputUrls);
_bulkbuilder.commitJob(jobName);
_jobManager.finishJob(jobName, jobRunId);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
final AnySeq deltaHashValuesNew = DataFactory.DEFAULT.createAnySeq();
checkCrawledRecords(14, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertZeitAtomProperties(metadata, false);
}
}, deltaHashValuesNew);
// record '0' is different in second crawled feed
assertDeltaHashValues(deltaHashValuesOld, deltaHashValuesNew, 0);
}
/**
* configured deltaProps ["itemAuthors", "feedCategories"] don't have values, so all mapped props are used and the
* change is detected.
*/
public void testCrawlDeltaPropsNoValues() throws Exception {
final String jobName = "crawlDeltaPropsNoValues";
String jobRunId = _jobManager.startJob(jobName);
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl");
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online.atom");
_bulkbuilder.addRecord(jobName, inputUrls);
_bulkbuilder.commitJob(jobName);
_jobManager.finishJob(jobName, jobRunId);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
final AnySeq deltaHashValuesOld = DataFactory.DEFAULT.createAnySeq();
checkCrawledRecords(14, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertZeitAtomProperties(metadata, false);
}
}, deltaHashValuesOld);
_objectStore.clearStore(STORE_NAME);
jobRunId = _jobManager.startJob(jobName);
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online-update.atom");
_bulkbuilder.addRecord(jobName, inputUrls);
_bulkbuilder.commitJob(jobName);
_jobManager.finishJob(jobName, jobRunId);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
final AnySeq deltaHashValuesNew = DataFactory.DEFAULT.createAnySeq();
checkCrawledRecords(14, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertZeitAtomProperties(metadata, false);
}
}, deltaHashValuesNew);
// record '0' is different in second crawled feed
assertDeltaHashValues(deltaHashValuesOld, deltaHashValuesNew, 0);
}
public void testCrawlDeltaHashDefault() throws Exception {
String jobName = "crawlDeltaPropsDefault";
String jobRunId = _jobManager.startJob(jobName);
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl");
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online.atom");
_bulkbuilder.addRecord(jobName, inputUrls);
_bulkbuilder.commitJob(jobName);
_jobManager.finishJob(jobName, jobRunId);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
final AnySeq deltaHashValuesOld = DataFactory.DEFAULT.createAnySeq();
checkCrawledRecords(14, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertZeitAtomProperties(metadata, false);
}
}, deltaHashValuesOld);
_objectStore.clearStore(STORE_NAME);
jobName = "crawlDeltaPropsAll";
jobRunId = _jobManager.startJob(jobName);
_bulkbuilder.addRecord(jobName, inputUrls);
_bulkbuilder.commitJob(jobName);
_jobManager.finishJob(jobName, jobRunId);
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION);
final AnySeq deltaHashValuesNew = DataFactory.DEFAULT.createAnySeq();
checkCrawledRecords(14, new PropertyAssertions() {
@Override
public void assertProperties(final AnyMap metadata) {
assertZeitAtomProperties(metadata, false);
}
}, deltaHashValuesNew);
// assert that delta hashes are equal to previous job run
assertDeltaHashValues(deltaHashValuesOld, deltaHashValuesNew, null);
}
protected void waitForJobRunFailed(final String jobName, final String jobId, final long maxWaitTime)
throws Exception {
waitForJobRun(jobName, jobId, maxWaitTime, JobState.FAILED);
}
protected void waitForJobRunCompleted(final String jobName, final String jobId, final long maxWaitTime)
throws Exception {
waitForJobRun(jobName, jobId, maxWaitTime, JobState.SUCCEEDED);
}
/** Waits for a job to be completed. */
protected void waitForJobRun(final String jobName, final String jobId, final long maxWaitTime,
final JobState expectedJobState) throws Exception {
final long sleepTime = 500L;
final long millisStarted = System.currentTimeMillis();
while (true) {
final AnyMap runData = _jobRunDataProvider.getJobRunData(jobName, jobId);
final String jobRunState = runData.getStringValue(JobManagerConstants.DATA_JOB_STATE);
if (jobRunState != null) {
final JobState state = JobState.valueOf(jobRunState);
if (state == expectedJobState) {
return; // finally found what we're waiting for.
}
if (expectedJobState == JobState.SUCCEEDED) {
assertNotSame("didn't expect JobState.FAILED", JobState.FAILED, state);
}
if (expectedJobState == JobState.FAILED) {
assertNotSame("didn't expect JobState.SUCCEEDED", JobState.SUCCEEDED, state);
}
}
assertTrue("Waited too long for job to complete", System.currentTimeMillis() - millisStarted <= maxWaitTime);
Thread.sleep(sleepTime);
}
}
private interface PropertyAssertions {
void assertProperties(AnyMap metadata);
}
/** checks the crawled files. If no compound file is expected, compounds should be null. */
private void checkCrawledRecords(final int expectedRecordCount, final PropertyAssertions propertyAssertions)
throws ObjectStoreException, IOException {
checkCrawledRecords(expectedRecordCount, propertyAssertions, DataFactory.DEFAULT.createAnySeq());
}
/** checks the crawled files. If no compound file is expected, compounds should be null. */
private void checkCrawledRecords(final int expectedRecordCount, final PropertyAssertions propertyAssertions,
final AnySeq deltaHasValues) throws ObjectStoreException, IOException {
final Collection<StoreObject> objects = _objectStore.getStoreObjectInfos(STORE_NAME, BUCKET);
if (expectedRecordCount == 0) {
assertTrue(objects.isEmpty());
} else {
int recordCount = 0;
assertFalse(objects.isEmpty());
for (final StoreObject objectInfo : objects) {
final InputStream bulkStream = _objectStore.readObject(STORE_NAME, objectInfo.getId());
try (final BinaryObjectStreamIterator records = new BinaryObjectStreamIterator(bulkStream)) {
while (records.hasNext()) {
final Record record = records.next();
assertNotNull(record);
System.out.println(record);
recordCount++;
assertNotNull(record.getId());
assertEquals("feeds", record.getSource());
final AnyMap metadata = record.getMetadata();
assertTrue(metadata.containsKey(ImportingConstants.ATTRIBUTE_DELTA_HASH));
deltaHasValues.add(metadata.getStringValue(ImportingConstants.ATTRIBUTE_DELTA_HASH));
assertFalse(record.hasAttachments());
if (propertyAssertions != null) {
propertyAssertions.assertProperties(metadata);
}
}
} finally {
IOUtils.closeQuietly(bulkStream);
}
}
assertEquals(expectedRecordCount, recordCount);
}
}
protected void assertEclipseRssProperties(final AnyMap metadata, final boolean expectAllProperties) {
assertFalse(metadata.getSeq("Authors").isEmpty());
assertNull(metadata.getSeq("Contents"));
assertFalse(metadata.getMap("Description").isEmpty());
assertNotNull(metadata.getDateTimeValue("PublishDate"));
assertNull(metadata.getDateTimeValue("UpdateDate"));
assertNotNull(metadata.getStringValue("URI"));
if (expectAllProperties) {
// all feed properties
assertEquals("http://localhost:8700/feeds/eclipse-smila-news.rss", metadata.getStringValue("FeedSourceUrl"));
assertNull(metadata.getSeq("FeedAuthors"));
assertNull(metadata.getSeq("FeedCategories"));
assertNull(metadata.getSeq("FeedContributors"));
assertNull(metadata.getStringValue("FeedCopyright"));
assertNotNull(metadata.getStringValue("FeedDescription"));
assertNull(metadata.getStringValue("FeedEncoding"));
assertNull(metadata.getMap("FeedImage"));
assertNull(metadata.getStringValue("FeedLanguage"));
assertFalse(metadata.getSeq("FeedLinks").isEmpty());
assertNull(metadata.getDateTimeValue("FeedPublishDate"));
assertNotNull(metadata.getStringValue("FeedTitle"));
assertNotNull(metadata.getStringValue("FeedType"));
assertNotNull(metadata.getStringValue("FeedUri"));
// all remaining entry properties
assertNull(metadata.getSeq("Categories"));
assertNull(metadata.getSeq("Contributors"));
assertNull(metadata.getSeq("Enclosures"));
assertFalse(metadata.getSeq("Links").isEmpty());
assertNotNull(metadata.getStringValue("Title"));
}
}
protected void assertTwitterAtomProperties(final AnyMap metadata, final boolean expectAllProperties) {
assertFalse(metadata.getSeq("Authors").isEmpty());
assertFalse(metadata.getSeq("Contents").isEmpty());
assertNull(metadata.getMap("Description"));
assertNotNull(metadata.getDateTimeValue("PublishDate"));
assertNotNull(metadata.getDateTimeValue("UpdateDate"));
assertNotNull(metadata.getStringValue("URI"));
if (expectAllProperties) {
// all feed properties
assertEquals("http://localhost:8700/feeds/twitter-smila-search.atom",
metadata.getStringValue("FeedSourceUrl"));
assertNull(metadata.getSeq("FeedAuthors"));
assertNull(metadata.getSeq("FeedCategories"));
assertNull(metadata.getSeq("FeedContributors"));
assertNull(metadata.getStringValue("FeedCopyright"));
assertNull(metadata.getStringValue("FeedDescription"));
assertNull(metadata.getStringValue("FeedEncoding"));
assertNull(metadata.getMap("FeedImage"));
assertNull(metadata.getStringValue("FeedLanguage"));
assertFalse(metadata.getSeq("FeedLinks").isEmpty());
assertNotNull(metadata.getDateTimeValue("FeedPublishDate"));
assertNotNull(metadata.getStringValue("FeedTitle"));
assertNotNull(metadata.getStringValue("FeedUri"));
assertNotNull(metadata.getStringValue("FeedType"));
// all remaining entry properties
assertNull(metadata.getSeq("Categories"));
assertNull(metadata.getSeq("Contributors"));
assertNull(metadata.getSeq("Enclosures"));
assertFalse(metadata.getSeq("Links").isEmpty());
assertNotNull(metadata.getStringValue("Title"));
}
}
protected void assertSpiegelRssProperties(final AnyMap metadata, final boolean expectAllProperties) {
assertNull(metadata.getSeq("Authors"));
assertFalse(metadata.getSeq("Contents").isEmpty());
assertFalse(metadata.getMap("Description").isEmpty());
assertNotNull(metadata.getDateTimeValue("PublishDate"));
assertNull(metadata.getDateTimeValue("UpdateDate"));
assertNotNull(metadata.getStringValue("URI"));
if (expectAllProperties) {
// all feed properties
assertEquals("http://localhost:8700/feeds/spiegel-nachrichten.rss", metadata.getStringValue("FeedSourceUrl"));
assertNull(metadata.getSeq("FeedAuthors"));
assertNull(metadata.getSeq("FeedCategories"));
assertNull(metadata.getSeq("FeedContributors"));
assertNull(metadata.getStringValue("FeedCopyright"));
assertNotNull(metadata.getStringValue("FeedDescription"));
assertNull(metadata.getStringValue("FeedEncoding"));
assertFalse(metadata.getMap("FeedImage").isEmpty());
assertNotNull(metadata.getStringValue("FeedLanguage"));
assertFalse(metadata.getSeq("FeedLinks").isEmpty());
assertNotNull(metadata.getDateTimeValue("FeedPublishDate"));
assertNotNull(metadata.getStringValue("FeedTitle"));
assertNull(metadata.getStringValue("FeedUri"));
assertNotNull(metadata.getStringValue("FeedType"));
// all remaining entry properties
assertNull(metadata.getSeq("Categories"));
assertNull(metadata.getSeq("Contributors"));
assertFalse(metadata.getSeq("Enclosures").isEmpty());
assertFalse(metadata.getSeq("Links").isEmpty());
assertNotNull(metadata.getStringValue("Title"));
}
}
protected void assertZeitAtomProperties(final AnyMap metadata, final boolean expectAllProperties) {
assertNull(metadata.getSeq("Authors"));
assertNull(metadata.getSeq("Contents"));
assertFalse(metadata.getMap("Description").isEmpty());
assertNull(metadata.getDateTimeValue("PublishDate"));
assertNull(metadata.getDateTimeValue("UpdateDate"));
assertNotNull(metadata.getStringValue("URI"));
if (expectAllProperties) {
// all feed properties
assertEquals("http://localhost:8700/feeds/zeit-online.atom", metadata.getStringValue("FeedSourceUrl"));
assertFalse(metadata.getSeq("FeedAuthors").isEmpty());
assertNull(metadata.getSeq("FeedCategories"));
assertNull(metadata.getSeq("FeedContributors"));
assertNotNull(metadata.getStringValue("FeedCopyright"));
assertNotNull(metadata.getStringValue("FeedDescription"));
assertNull(metadata.getStringValue("FeedEncoding"));
assertNotNull(metadata.getMap("FeedImage"));
assertNotNull(metadata.getStringValue("FeedLanguage"));
assertNotNull(metadata.getSeq("FeedLinks"));
assertNotNull(metadata.getDateTimeValue("FeedPublishDate"));
assertNotNull(metadata.getStringValue("FeedTitle"));
assertNull(metadata.getStringValue("FeedUri"));
assertNotNull(metadata.getStringValue("FeedType"));
// all remaining entry properties
assertFalse(metadata.getSeq("Categories").isEmpty());
assertNull(metadata.getSeq("Contributors"));
assertNull(metadata.getSeq("Enclosures"));
assertFalse(metadata.getSeq("Links").isEmpty());
assertNotNull(metadata.getStringValue("Title"));
}
}
protected void assertZeitNewAtomProperties(final AnyMap metadata, final boolean expectAllProperties) {
assertNull(metadata.getSeq("Authors"));
assertNull(metadata.getSeq("Contents"));
assertFalse(metadata.getMap("Description").isEmpty());
assertNull(metadata.getDateTimeValue("PublishDate"));
assertNull(metadata.getDateTimeValue("UpdateDate"));
assertNotNull(metadata.getStringValue("URI"));
if (expectAllProperties) {
// all feed properties
assertEquals("http://localhost:8700/feeds/zeit-online-newer.atom", metadata.getStringValue("FeedSourceUrl"));
assertFalse(metadata.getSeq("FeedAuthors").isEmpty());
assertNull(metadata.getSeq("FeedCategories"));
assertNull(metadata.getSeq("FeedContributors"));
assertNotNull(metadata.getStringValue("FeedCopyright"));
assertNotNull(metadata.getStringValue("FeedDescription"));
assertNull(metadata.getStringValue("FeedEncoding"));
assertNotNull(metadata.getMap("FeedImage"));
assertNotNull(metadata.getStringValue("FeedLanguage"));
assertNotNull(metadata.getSeq("FeedLinks"));
assertNotNull(metadata.getDateTimeValue("FeedPublishDate"));
assertNotNull(metadata.getStringValue("FeedTitle"));
assertNull(metadata.getStringValue("FeedUri"));
assertNotNull(metadata.getStringValue("FeedType"));
// all remaining entry properties
assertFalse(metadata.getSeq("Categories").isEmpty());
assertNull(metadata.getSeq("Contributors"));
assertNull(metadata.getSeq("Enclosures"));
assertFalse(metadata.getSeq("Links").isEmpty());
assertNotNull(metadata.getStringValue("Title"));
}
}
protected void assertAtomOrRssProperties(final AnyMap metadata, final boolean expectAllProperties) {
// System.out.println(metadata);
final AnyMap feedLink = metadata.getSeq("FeedLinks").getMap(0);
final String feedUri = feedLink.getStringValue("Href");
switch (feedUri) {
case "http://www.eclipse.org/forums/":
assertEclipseRssProperties(metadata, expectAllProperties);
break;
case "http://search.twitter.com/search?q=smila":
assertTwitterAtomProperties(metadata, expectAllProperties);
break;
case "http://www.spiegel.de":
assertSpiegelRssProperties(metadata, expectAllProperties);
break;
case "http://www.zeit.de/index":
assertZeitAtomProperties(metadata, expectAllProperties);
break;
default:
fail("unexpected feed URI '" + feedUri + "'");
}
}
private void assertDeltaHashValues(final AnySeq deltaHashValuesOld, final AnySeq deltaHashValuesNew,
final Integer recordNoWithChangedDeltaHash) {
for (int i = 0; i < deltaHashValuesOld.size(); i++) {
if (recordNoWithChangedDeltaHash != null && i == recordNoWithChangedDeltaHash) {
assertFalse(deltaHashValuesOld.get(i).equals(deltaHashValuesNew.get(i)));
} else {
assertTrue(deltaHashValuesOld.get(i).equals(deltaHashValuesNew.get(i)));
}
}
}
}