| package org.eclipse.smila.importing.crawler.web.test; |
| |
| import java.util.UUID; |
| |
| import org.eclipse.smila.datamodel.AnyMap; |
| import org.eclipse.smila.datamodel.DataFactory; |
| import org.eclipse.smila.importing.ImportingConstants; |
| import org.eclipse.smila.importing.crawler.web.WebCrawlerConstants; |
| import org.eclipse.smila.importing.crawler.web.WebCrawlingContext; |
| import org.eclipse.smila.taskmanager.Task; |
| import org.eclipse.smila.taskworker.DefaultTaskLogFactory; |
| import org.eclipse.smila.taskworker.TaskLog; |
| import org.eclipse.smila.taskworker.internal.TaskContextImpl; |
| import org.eclipse.smila.test.DeclarativeServiceTestCase; |
| |
| public abstract class WebExtractorTestBase extends DeclarativeServiceTestCase { |
| |
| public static final String DUMMY_DATA_SOURCE_ID = "dummySource"; |
| |
| public static final String DUMMY_JOB_RUN_ID = "dummyJob"; |
| |
| /** dummy logger. */ |
| protected final TaskLog _taskLog = new DefaultTaskLogFactory().getTaskLog(null); |
| |
| /** dummy crawling context. */ |
| protected WebCrawlingContext _webCrawlingContext; |
| |
| /** constructs a dummy task context. */ |
| public WebExtractorTestBase() { |
| super(); |
| final Task task = new Task(UUID.randomUUID().toString(), "dummy"); |
| task.getProperties().put(Task.PROPERTY_JOB_RUN_ID, DUMMY_JOB_RUN_ID); |
| task.getParameters().put(ImportingConstants.TASK_PARAM_MAPPING, initMapping()); |
| task.getParameters().put(ImportingConstants.TASK_PARAM_DATA_SOURCE, DUMMY_DATA_SOURCE_ID); |
| _webCrawlingContext = new WebCrawlingContext(new TaskContextImpl(task, _taskLog, null)); |
| _webCrawlingContext.setCurrentInputBulkId("dummyInputBulk"); |
| } |
| |
| protected void initWebCrawlingContext(final AnyMap taskParameters) { |
| final Task task = new Task(UUID.randomUUID().toString(), "dummy"); |
| task.getProperties().put(Task.PROPERTY_JOB_RUN_ID, DUMMY_JOB_RUN_ID); |
| task.getParameters().putAll(taskParameters); |
| if (!task.getParameters().containsKey(ImportingConstants.TASK_PARAM_MAPPING)) { |
| task.getParameters().put(ImportingConstants.TASK_PARAM_MAPPING, initMapping()); |
| } |
| if (!task.getParameters().containsKey(ImportingConstants.TASK_PARAM_DATA_SOURCE)) { |
| task.getParameters().put(ImportingConstants.TASK_PARAM_DATA_SOURCE, DUMMY_DATA_SOURCE_ID); |
| } |
| _webCrawlingContext = new WebCrawlingContext(new TaskContextImpl(task, _taskLog, null)); |
| _webCrawlingContext.setCurrentInputBulkId("dummyInputBulk"); |
| } |
| |
| /** initialize the mapping. */ |
| private AnyMap initMapping() { |
| final AnyMap map = DataFactory.DEFAULT.createAnyMap(); |
| map.put(WebCrawlerConstants.ATTRIBUTE_CHARSET, "my-charset"); |
| map.put(WebCrawlerConstants.ATTRIBUTE_CONTENTTYPE, "my-contenttype"); |
| map.put(WebCrawlerConstants.ATTRIBUTE_LASTMODIFIED, "my-lastmodified"); |
| map.put(WebCrawlerConstants.ATTRIBUTE_MIMETYPE, "my-mimetype"); |
| map.put(WebCrawlerConstants.ATTRIBUTE_SIZE, "my-size"); |
| map.put(WebCrawlerConstants.ATTRIBUTE_URL, "my-url"); |
| map.put(WebCrawlerConstants.ATTACHMENT_CONTENT, "my-content"); |
| return map; |
| } |
| |
| } |