core/org.eclipse.smila.importing.crawler.web/code/src/org/eclipse/smila/importing/crawler/web/LinkExtractor.java - gerrit/smila/org.eclipse.smila.core - Git at Google

 /*********************************************************************************************************************
  * Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
  * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
  * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
  **********************************************************************************************************************/
 package org.eclipse.smila.importing.crawler.web;

 import java.util.Collection;

 import org.eclipse.smila.datamodel.Record;

 /**
  * Extract links from content contained in input record.
  */
 public interface LinkExtractor {

   /**
    * @param inputRecord
    *          input record with content
    * @param context
    *          the web crawling context
    * @return for each extracted link a new record is created that has an attribute 'httpUrl' with an (absolute) URL.
    */
   Collection<Record> extractLinks(final Record inputRecord, final WebCrawlingContext context)
     throws WebCrawlerException;

 }
	/*********************************************************************************************************************
	* Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
	* accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
	* distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
	**********************************************************************************************************************/
	package org.eclipse.smila.importing.crawler.web;

	import java.util.Collection;

	import org.eclipse.smila.datamodel.Record;

	/**
	* Extract links from content contained in input record.
	*/
	public interface LinkExtractor {

	/**
	* @param inputRecord
	* input record with content
	* @param context
	* the web crawling context
	* @return for each extracted link a new record is created that has an attribute 'httpUrl' with an (absolute) URL.
	*/
	Collection<Record> extractLinks(final Record inputRecord, final WebCrawlingContext context)
	throws WebCrawlerException;

	}