/********************************************************************************************************************* | |
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This | |
* program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which | |
* accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html | |
*********************************************************************************************************************/ | |
package org.eclipse.smila.importing.crawler.feed.test; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.util.Collection; | |
import org.apache.commons.io.IOUtils; | |
import org.eclipse.smila.bulkbuilder.BulkbuilderService; | |
import org.eclipse.smila.datamodel.AnyMap; | |
import org.eclipse.smila.datamodel.AnySeq; | |
import org.eclipse.smila.datamodel.DataFactory; | |
import org.eclipse.smila.datamodel.Record; | |
import org.eclipse.smila.datamodel.ipc.BinaryObjectStreamIterator; | |
import org.eclipse.smila.importing.ImportingConstants; | |
import org.eclipse.smila.importing.crawler.feed.FeedCrawlerWorker; | |
import org.eclipse.smila.jobmanager.JobRunDataProvider; | |
import org.eclipse.smila.jobmanager.JobRunEngine; | |
import org.eclipse.smila.jobmanager.JobState; | |
import org.eclipse.smila.jobmanager.definitions.JobManagerConstants; | |
import org.eclipse.smila.objectstore.ObjectStoreException; | |
import org.eclipse.smila.objectstore.ObjectStoreService; | |
import org.eclipse.smila.objectstore.StoreObject; | |
import org.eclipse.smila.taskworker.Worker; | |
import org.eclipse.smila.test.DeclarativeServiceTestCase; | |
import org.eclipse.smila.utils.service.ServiceUtils; | |
import org.osgi.framework.ServiceReference; | |
public class TestFeedCrawlerWorker extends DeclarativeServiceTestCase { | |
private static final String STORE_NAME = "records"; | |
private static final String BUCKET = "crawledEntries"; | |
private static final String INVALID_URL = "http://anonymous.invalid/index.html"; | |
private static final long MAX_WAIT_TIME_JOB_COMPLETION = 5000L; // ms | |
protected static final String BUCKETS_STORE = "buckets"; | |
/** name of attribute containing the URL of the web resource. */ | |
public static final String ATTRIBUTE_URL = "httpUrl"; | |
private JobRunEngine _jobManager; | |
private BulkbuilderService _bulkbuilder; | |
private JobRunDataProvider _jobRunDataProvider; | |
private ObjectStoreService _objectStore; | |
@Override | |
protected void setUp() throws Exception { | |
super.setUp(); | |
_jobManager = getService(JobRunEngine.class); | |
_bulkbuilder = getService(BulkbuilderService.class); | |
_jobRunDataProvider = getService(JobRunDataProvider.class); | |
_objectStore = getService(ObjectStoreService.class); | |
if (_objectStore.existsStore(STORE_NAME)) { | |
_objectStore.clearStore(STORE_NAME); | |
} else { | |
_objectStore.createStore(STORE_NAME, null); | |
} | |
} | |
/** tests OSGI service. */ | |
@SuppressWarnings("rawtypes") | |
public void testService() throws Exception { | |
final ServiceReference[] services = ServiceUtils.getServiceReferences(Worker.class); | |
assertTrue("no worker services started.", services.length > 0); | |
for (final ServiceReference service : services) { | |
final Worker worker = ServiceUtils.getService(service, Worker.class); | |
if (worker instanceof FeedCrawlerWorker) { | |
assertEquals(FeedCrawlerWorker.NAME, worker.getName()); | |
return; // worker found, test ok. | |
} | |
} | |
fail("FeedCrawlerWorker not found"); | |
} | |
public void testCrawlRssFeed() throws Exception { | |
final String jobName = "crawlRssFeed"; | |
final String jobRunId = _jobManager.startJob(jobName); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
checkCrawledRecords(100, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertEclipseRssProperties(metadata, false); | |
} | |
}); | |
} | |
public void testCrawlAtomFeed() throws Exception { | |
final String jobName = "crawlAtomFeed"; | |
final String jobRunId = _jobManager.startJob(jobName); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
checkCrawledRecords(15, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertTwitterAtomProperties(metadata, false); | |
} | |
}); | |
} | |
public void testCrawlManyFeeds() throws Exception { | |
final String jobName = "crawlManyFeeds"; | |
final String jobRunId = _jobManager.startJob(jobName); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
checkCrawledRecords(137, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertAtomOrRssProperties(metadata, false); | |
} | |
}); | |
} | |
/** tests crawl job that contains a valid and an invalid feed url. */ | |
public void testCrawlManyFeedsWithInvalidFeed() throws Exception { | |
final String jobName = "crawlManyFeedsWithInvalidFeed"; | |
final String jobRunId = _jobManager.startJob(jobName); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
checkCrawledRecords(100, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertAtomOrRssProperties(metadata, false); | |
} | |
}); | |
} | |
/** tests crawl job that contains only an invalid feed url. */ | |
public void testCrawlInvalidFeedUrl() throws Exception { | |
final String jobName = "crawlInvalidFeedUrl"; | |
final String jobRunId = _jobManager.startJob(jobName); | |
waitForJobRun(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION, JobState.FAILED); | |
checkCrawledRecords(0, null); | |
} | |
public void testCrawlAllProperties() throws Exception { | |
final String jobName = "crawlAllProperties"; | |
final String jobRunId = _jobManager.startJob(jobName); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
checkCrawledRecords(137, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertAtomOrRssProperties(metadata, true); | |
} | |
}); | |
} | |
public void testCrawlInputSlotSingleValue() throws Exception { | |
final String jobName = "crawlInputSlot"; | |
final String jobRunId = _jobManager.startJob(jobName); | |
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl"); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/eclipse-smila-news.rss"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
_bulkbuilder.commitJob(jobName); | |
_jobManager.finishJob(jobName, jobRunId); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
checkCrawledRecords(100, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertEclipseRssProperties(metadata, true); | |
} | |
}); | |
} | |
public void testCrawlInputSlotMultiValue() throws Exception { | |
final String jobName = "crawlInputSlot"; | |
final String jobRunId = _jobManager.startJob(jobName); | |
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl"); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/eclipse-smila-news.rss"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/twitter-smila-search.atom"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/spiegel-nachrichten.rss"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online.atom"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
_bulkbuilder.commitJob(jobName); | |
_jobManager.finishJob(jobName, jobRunId); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
checkCrawledRecords(137, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertAtomOrRssProperties(metadata, true); | |
} | |
}); | |
} | |
public void testCrawlInputSlotErrorHandling() throws Exception { | |
final String jobName = "crawlInputSlot"; | |
final String jobRunId = _jobManager.startJob(jobName); | |
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl"); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, INVALID_URL); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
_bulkbuilder.commitJob(jobName); | |
_jobManager.finishJob(jobName, jobRunId); | |
waitForJobRunFailed(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
} | |
public void testCrawlDeltaHashContent() throws Exception { | |
final String jobName = "crawlDeltaPropsContent"; | |
String jobRunId = _jobManager.startJob(jobName); | |
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl"); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online.atom"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
_bulkbuilder.commitJob(jobName); | |
_jobManager.finishJob(jobName, jobRunId); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
final AnySeq deltaHashValuesOld = DataFactory.DEFAULT.createAnySeq(); | |
checkCrawledRecords(14, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertZeitAtomProperties(metadata, false); | |
} | |
}, deltaHashValuesOld); | |
_objectStore.clearStore(STORE_NAME); | |
jobRunId = _jobManager.startJob(jobName); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online-newer.atom"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
_bulkbuilder.commitJob(jobName); | |
_jobManager.finishJob(jobName, jobRunId); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
final AnySeq deltaHashValuesNew = DataFactory.DEFAULT.createAnySeq(); | |
checkCrawledRecords(14, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertZeitAtomProperties(metadata, false); | |
} | |
}, deltaHashValuesNew); | |
// record '0' is different in second crawled feed | |
assertDeltaHashValues(deltaHashValuesOld, deltaHashValuesNew, 0); | |
} | |
/** | |
* configured deltaProps ["itemAuthors", "feedCategories"] don't have values, so all mapped props are used and the | |
* change is detected. | |
*/ | |
public void testCrawlDeltaPropsNoValues() throws Exception { | |
final String jobName = "crawlDeltaPropsNoValues"; | |
String jobRunId = _jobManager.startJob(jobName); | |
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl"); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online.atom"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
_bulkbuilder.commitJob(jobName); | |
_jobManager.finishJob(jobName, jobRunId); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
final AnySeq deltaHashValuesOld = DataFactory.DEFAULT.createAnySeq(); | |
checkCrawledRecords(14, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertZeitAtomProperties(metadata, false); | |
} | |
}, deltaHashValuesOld); | |
_objectStore.clearStore(STORE_NAME); | |
jobRunId = _jobManager.startJob(jobName); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online-update.atom"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
_bulkbuilder.commitJob(jobName); | |
_jobManager.finishJob(jobName, jobRunId); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
final AnySeq deltaHashValuesNew = DataFactory.DEFAULT.createAnySeq(); | |
checkCrawledRecords(14, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertZeitAtomProperties(metadata, false); | |
} | |
}, deltaHashValuesNew); | |
// record '0' is different in second crawled feed | |
assertDeltaHashValues(deltaHashValuesOld, deltaHashValuesNew, 0); | |
} | |
public void testCrawlDeltaHashDefault() throws Exception { | |
String jobName = "crawlDeltaPropsDefault"; | |
String jobRunId = _jobManager.startJob(jobName); | |
final Record inputUrls = DataFactory.DEFAULT.createRecord("httpUrl"); | |
inputUrls.getMetadata().put(ATTRIBUTE_URL, "http://localhost:8700/feeds/zeit-online.atom"); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
_bulkbuilder.commitJob(jobName); | |
_jobManager.finishJob(jobName, jobRunId); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
final AnySeq deltaHashValuesOld = DataFactory.DEFAULT.createAnySeq(); | |
checkCrawledRecords(14, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertZeitAtomProperties(metadata, false); | |
} | |
}, deltaHashValuesOld); | |
_objectStore.clearStore(STORE_NAME); | |
jobName = "crawlDeltaPropsAll"; | |
jobRunId = _jobManager.startJob(jobName); | |
_bulkbuilder.addRecord(jobName, inputUrls); | |
_bulkbuilder.commitJob(jobName); | |
_jobManager.finishJob(jobName, jobRunId); | |
waitForJobRunCompleted(jobName, jobRunId, MAX_WAIT_TIME_JOB_COMPLETION); | |
final AnySeq deltaHashValuesNew = DataFactory.DEFAULT.createAnySeq(); | |
checkCrawledRecords(14, new PropertyAssertions() { | |
@Override | |
public void assertProperties(final AnyMap metadata) { | |
assertZeitAtomProperties(metadata, false); | |
} | |
}, deltaHashValuesNew); | |
// assert that delta hashes are equal to previous job run | |
assertDeltaHashValues(deltaHashValuesOld, deltaHashValuesNew, null); | |
} | |
protected void waitForJobRunFailed(final String jobName, final String jobId, final long maxWaitTime) | |
throws Exception { | |
waitForJobRun(jobName, jobId, maxWaitTime, JobState.FAILED); | |
} | |
protected void waitForJobRunCompleted(final String jobName, final String jobId, final long maxWaitTime) | |
throws Exception { | |
waitForJobRun(jobName, jobId, maxWaitTime, JobState.SUCCEEDED); | |
} | |
/** Waits for a job to be completed. */ | |
protected void waitForJobRun(final String jobName, final String jobId, final long maxWaitTime, | |
final JobState expectedJobState) throws Exception { | |
final long sleepTime = 500L; | |
final long millisStarted = System.currentTimeMillis(); | |
while (true) { | |
final AnyMap runData = _jobRunDataProvider.getJobRunData(jobName, jobId); | |
final String jobRunState = runData.getStringValue(JobManagerConstants.DATA_JOB_STATE); | |
if (jobRunState != null) { | |
final JobState state = JobState.valueOf(jobRunState); | |
if (state == expectedJobState) { | |
return; // finally found what we're waiting for. | |
} | |
if (expectedJobState == JobState.SUCCEEDED) { | |
assertNotSame("didn't expect JobState.FAILED", JobState.FAILED, state); | |
} | |
if (expectedJobState == JobState.FAILED) { | |
assertNotSame("didn't expect JobState.SUCCEEDED", JobState.SUCCEEDED, state); | |
} | |
} | |
assertTrue("Waited too long for job to complete", System.currentTimeMillis() - millisStarted <= maxWaitTime); | |
Thread.sleep(sleepTime); | |
} | |
} | |
private interface PropertyAssertions { | |
void assertProperties(AnyMap metadata); | |
} | |
/** checks the crawled files. If no compound file is expected, compounds should be null. */ | |
private void checkCrawledRecords(final int expectedRecordCount, final PropertyAssertions propertyAssertions) | |
throws ObjectStoreException, IOException { | |
checkCrawledRecords(expectedRecordCount, propertyAssertions, DataFactory.DEFAULT.createAnySeq()); | |
} | |
/** checks the crawled files. If no compound file is expected, compounds should be null. */ | |
private void checkCrawledRecords(final int expectedRecordCount, final PropertyAssertions propertyAssertions, | |
final AnySeq deltaHasValues) throws ObjectStoreException, IOException { | |
final Collection<StoreObject> objects = _objectStore.getStoreObjectInfos(STORE_NAME, BUCKET); | |
if (expectedRecordCount == 0) { | |
assertTrue(objects.isEmpty()); | |
} else { | |
int recordCount = 0; | |
assertFalse(objects.isEmpty()); | |
for (final StoreObject objectInfo : objects) { | |
final InputStream bulkStream = _objectStore.readObject(STORE_NAME, objectInfo.getId()); | |
try (final BinaryObjectStreamIterator records = new BinaryObjectStreamIterator(bulkStream)) { | |
while (records.hasNext()) { | |
final Record record = records.next(); | |
assertNotNull(record); | |
System.out.println(record); | |
recordCount++; | |
assertNotNull(record.getId()); | |
assertEquals("feeds", record.getSource()); | |
final AnyMap metadata = record.getMetadata(); | |
assertTrue(metadata.containsKey(ImportingConstants.ATTRIBUTE_DELTA_HASH)); | |
deltaHasValues.add(metadata.getStringValue(ImportingConstants.ATTRIBUTE_DELTA_HASH)); | |
assertFalse(record.hasAttachments()); | |
if (propertyAssertions != null) { | |
propertyAssertions.assertProperties(metadata); | |
} | |
} | |
} finally { | |
IOUtils.closeQuietly(bulkStream); | |
} | |
} | |
assertEquals(expectedRecordCount, recordCount); | |
} | |
} | |
protected void assertEclipseRssProperties(final AnyMap metadata, final boolean expectAllProperties) { | |
assertFalse(metadata.getSeq("Authors").isEmpty()); | |
assertNull(metadata.getSeq("Contents")); | |
assertFalse(metadata.getMap("Description").isEmpty()); | |
assertNotNull(metadata.getDateTimeValue("PublishDate")); | |
assertNull(metadata.getDateTimeValue("UpdateDate")); | |
assertNotNull(metadata.getStringValue("URI")); | |
if (expectAllProperties) { | |
// all feed properties | |
assertEquals("http://localhost:8700/feeds/eclipse-smila-news.rss", metadata.getStringValue("FeedSourceUrl")); | |
assertNull(metadata.getSeq("FeedAuthors")); | |
assertNull(metadata.getSeq("FeedCategories")); | |
assertNull(metadata.getSeq("FeedContributors")); | |
assertNull(metadata.getStringValue("FeedCopyright")); | |
assertNotNull(metadata.getStringValue("FeedDescription")); | |
assertNull(metadata.getStringValue("FeedEncoding")); | |
assertNull(metadata.getMap("FeedImage")); | |
assertNull(metadata.getStringValue("FeedLanguage")); | |
assertFalse(metadata.getSeq("FeedLinks").isEmpty()); | |
assertNull(metadata.getDateTimeValue("FeedPublishDate")); | |
assertNotNull(metadata.getStringValue("FeedTitle")); | |
assertNotNull(metadata.getStringValue("FeedType")); | |
assertNotNull(metadata.getStringValue("FeedUri")); | |
// all remaining entry properties | |
assertNull(metadata.getSeq("Categories")); | |
assertNull(metadata.getSeq("Contributors")); | |
assertNull(metadata.getSeq("Enclosures")); | |
assertFalse(metadata.getSeq("Links").isEmpty()); | |
assertNotNull(metadata.getStringValue("Title")); | |
} | |
} | |
protected void assertTwitterAtomProperties(final AnyMap metadata, final boolean expectAllProperties) { | |
assertFalse(metadata.getSeq("Authors").isEmpty()); | |
assertFalse(metadata.getSeq("Contents").isEmpty()); | |
assertNull(metadata.getMap("Description")); | |
assertNotNull(metadata.getDateTimeValue("PublishDate")); | |
assertNotNull(metadata.getDateTimeValue("UpdateDate")); | |
assertNotNull(metadata.getStringValue("URI")); | |
if (expectAllProperties) { | |
// all feed properties | |
assertEquals("http://localhost:8700/feeds/twitter-smila-search.atom", | |
metadata.getStringValue("FeedSourceUrl")); | |
assertNull(metadata.getSeq("FeedAuthors")); | |
assertNull(metadata.getSeq("FeedCategories")); | |
assertNull(metadata.getSeq("FeedContributors")); | |
assertNull(metadata.getStringValue("FeedCopyright")); | |
assertNull(metadata.getStringValue("FeedDescription")); | |
assertNull(metadata.getStringValue("FeedEncoding")); | |
assertNull(metadata.getMap("FeedImage")); | |
assertNull(metadata.getStringValue("FeedLanguage")); | |
assertFalse(metadata.getSeq("FeedLinks").isEmpty()); | |
assertNotNull(metadata.getDateTimeValue("FeedPublishDate")); | |
assertNotNull(metadata.getStringValue("FeedTitle")); | |
assertNotNull(metadata.getStringValue("FeedUri")); | |
assertNotNull(metadata.getStringValue("FeedType")); | |
// all remaining entry properties | |
assertNull(metadata.getSeq("Categories")); | |
assertNull(metadata.getSeq("Contributors")); | |
assertNull(metadata.getSeq("Enclosures")); | |
assertFalse(metadata.getSeq("Links").isEmpty()); | |
assertNotNull(metadata.getStringValue("Title")); | |
} | |
} | |
protected void assertSpiegelRssProperties(final AnyMap metadata, final boolean expectAllProperties) { | |
assertNull(metadata.getSeq("Authors")); | |
assertFalse(metadata.getSeq("Contents").isEmpty()); | |
assertFalse(metadata.getMap("Description").isEmpty()); | |
assertNotNull(metadata.getDateTimeValue("PublishDate")); | |
assertNull(metadata.getDateTimeValue("UpdateDate")); | |
assertNotNull(metadata.getStringValue("URI")); | |
if (expectAllProperties) { | |
// all feed properties | |
assertEquals("http://localhost:8700/feeds/spiegel-nachrichten.rss", metadata.getStringValue("FeedSourceUrl")); | |
assertNull(metadata.getSeq("FeedAuthors")); | |
assertNull(metadata.getSeq("FeedCategories")); | |
assertNull(metadata.getSeq("FeedContributors")); | |
assertNull(metadata.getStringValue("FeedCopyright")); | |
assertNotNull(metadata.getStringValue("FeedDescription")); | |
assertNull(metadata.getStringValue("FeedEncoding")); | |
assertFalse(metadata.getMap("FeedImage").isEmpty()); | |
assertNotNull(metadata.getStringValue("FeedLanguage")); | |
assertFalse(metadata.getSeq("FeedLinks").isEmpty()); | |
assertNotNull(metadata.getDateTimeValue("FeedPublishDate")); | |
assertNotNull(metadata.getStringValue("FeedTitle")); | |
assertNull(metadata.getStringValue("FeedUri")); | |
assertNotNull(metadata.getStringValue("FeedType")); | |
// all remaining entry properties | |
assertNull(metadata.getSeq("Categories")); | |
assertNull(metadata.getSeq("Contributors")); | |
assertFalse(metadata.getSeq("Enclosures").isEmpty()); | |
assertFalse(metadata.getSeq("Links").isEmpty()); | |
assertNotNull(metadata.getStringValue("Title")); | |
} | |
} | |
protected void assertZeitAtomProperties(final AnyMap metadata, final boolean expectAllProperties) { | |
assertNull(metadata.getSeq("Authors")); | |
assertNull(metadata.getSeq("Contents")); | |
assertFalse(metadata.getMap("Description").isEmpty()); | |
assertNull(metadata.getDateTimeValue("PublishDate")); | |
assertNull(metadata.getDateTimeValue("UpdateDate")); | |
assertNotNull(metadata.getStringValue("URI")); | |
if (expectAllProperties) { | |
// all feed properties | |
assertEquals("http://localhost:8700/feeds/zeit-online.atom", metadata.getStringValue("FeedSourceUrl")); | |
assertFalse(metadata.getSeq("FeedAuthors").isEmpty()); | |
assertNull(metadata.getSeq("FeedCategories")); | |
assertNull(metadata.getSeq("FeedContributors")); | |
assertNotNull(metadata.getStringValue("FeedCopyright")); | |
assertNotNull(metadata.getStringValue("FeedDescription")); | |
assertNull(metadata.getStringValue("FeedEncoding")); | |
assertNotNull(metadata.getMap("FeedImage")); | |
assertNotNull(metadata.getStringValue("FeedLanguage")); | |
assertNotNull(metadata.getSeq("FeedLinks")); | |
assertNotNull(metadata.getDateTimeValue("FeedPublishDate")); | |
assertNotNull(metadata.getStringValue("FeedTitle")); | |
assertNull(metadata.getStringValue("FeedUri")); | |
assertNotNull(metadata.getStringValue("FeedType")); | |
// all remaining entry properties | |
assertFalse(metadata.getSeq("Categories").isEmpty()); | |
assertNull(metadata.getSeq("Contributors")); | |
assertNull(metadata.getSeq("Enclosures")); | |
assertFalse(metadata.getSeq("Links").isEmpty()); | |
assertNotNull(metadata.getStringValue("Title")); | |
} | |
} | |
protected void assertZeitNewAtomProperties(final AnyMap metadata, final boolean expectAllProperties) { | |
assertNull(metadata.getSeq("Authors")); | |
assertNull(metadata.getSeq("Contents")); | |
assertFalse(metadata.getMap("Description").isEmpty()); | |
assertNull(metadata.getDateTimeValue("PublishDate")); | |
assertNull(metadata.getDateTimeValue("UpdateDate")); | |
assertNotNull(metadata.getStringValue("URI")); | |
if (expectAllProperties) { | |
// all feed properties | |
assertEquals("http://localhost:8700/feeds/zeit-online-newer.atom", metadata.getStringValue("FeedSourceUrl")); | |
assertFalse(metadata.getSeq("FeedAuthors").isEmpty()); | |
assertNull(metadata.getSeq("FeedCategories")); | |
assertNull(metadata.getSeq("FeedContributors")); | |
assertNotNull(metadata.getStringValue("FeedCopyright")); | |
assertNotNull(metadata.getStringValue("FeedDescription")); | |
assertNull(metadata.getStringValue("FeedEncoding")); | |
assertNotNull(metadata.getMap("FeedImage")); | |
assertNotNull(metadata.getStringValue("FeedLanguage")); | |
assertNotNull(metadata.getSeq("FeedLinks")); | |
assertNotNull(metadata.getDateTimeValue("FeedPublishDate")); | |
assertNotNull(metadata.getStringValue("FeedTitle")); | |
assertNull(metadata.getStringValue("FeedUri")); | |
assertNotNull(metadata.getStringValue("FeedType")); | |
// all remaining entry properties | |
assertFalse(metadata.getSeq("Categories").isEmpty()); | |
assertNull(metadata.getSeq("Contributors")); | |
assertNull(metadata.getSeq("Enclosures")); | |
assertFalse(metadata.getSeq("Links").isEmpty()); | |
assertNotNull(metadata.getStringValue("Title")); | |
} | |
} | |
protected void assertAtomOrRssProperties(final AnyMap metadata, final boolean expectAllProperties) { | |
// System.out.println(metadata); | |
final AnyMap feedLink = metadata.getSeq("FeedLinks").getMap(0); | |
final String feedUri = feedLink.getStringValue("Href"); | |
switch (feedUri) { | |
case "http://www.eclipse.org/forums/": | |
assertEclipseRssProperties(metadata, expectAllProperties); | |
break; | |
case "http://search.twitter.com/search?q=smila": | |
assertTwitterAtomProperties(metadata, expectAllProperties); | |
break; | |
case "http://www.spiegel.de": | |
assertSpiegelRssProperties(metadata, expectAllProperties); | |
break; | |
case "http://www.zeit.de/index": | |
assertZeitAtomProperties(metadata, expectAllProperties); | |
break; | |
default: | |
fail("unexpected feed URI '" + feedUri + "'"); | |
} | |
} | |
private void assertDeltaHashValues(final AnySeq deltaHashValuesOld, final AnySeq deltaHashValuesNew, | |
final Integer recordNoWithChangedDeltaHash) { | |
for (int i = 0; i < deltaHashValuesOld.size(); i++) { | |
if (recordNoWithChangedDeltaHash != null && i == recordNoWithChangedDeltaHash) { | |
assertFalse(deltaHashValuesOld.get(i).equals(deltaHashValuesNew.get(i))); | |
} else { | |
assertTrue(deltaHashValuesOld.get(i).equals(deltaHashValuesNew.get(i))); | |
} | |
} | |
} | |
} |