/********************************************************************************************************************* | |
* Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the | |
* accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this | |
* distribution, and is available at http://www.eclipse.org/legal/epl-v10.html | |
**********************************************************************************************************************/ | |
package org.eclipse.smila.importing.crawler.web; | |
import java.util.Collection; | |
import org.eclipse.smila.datamodel.Record; | |
/** | |
* Extract links from content contained in input record. | |
*/ | |
public interface LinkExtractor { | |
/** | |
* @param inputRecord | |
* input record with content | |
* @param context | |
* the web crawling context | |
* @return for each extracted link a new record is created that has an attribute 'httpUrl' with an (absolute) URL. | |
*/ | |
Collection<Record> extractLinks(final Record inputRecord, final WebCrawlingContext context) | |
throws WebCrawlerException; | |
} |