blob: 9a06f2c2fd975c486fbad85723e517fe350fcab3 [file] [log] [blame]
/*********************************************************************************************************************
* Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
* accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
* distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
**********************************************************************************************************************/
package org.eclipse.smila.importing.crawler.web;
import java.util.Collection;
import org.eclipse.smila.datamodel.Record;
/**
* Extract links from content contained in input record.
*/
public interface LinkExtractor {
/**
* @param inputRecord
* input record with content
* @param context
* the web crawling context
* @return for each extracted link a new record is created that has an attribute 'httpUrl' with an (absolute) URL.
*/
Collection<Record> extractLinks(final Record inputRecord, final WebCrawlingContext context)
throws WebCrawlerException;
}