blob: b1d4c5e07a389fdb4919aa695c8b3afa88146ada [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="SMILA/Documentation/HowTo/How to implement a crawler,SMILA/Development Guidelines/Create a bundle (plug-in),SMILA/Development Guidelines/How to integrate new bundle into build process,SMILA/Development Guidelines/How to integrate test bundle into build process,SMILA/Development Guidelines/Setup for JAXB code generation,SMILA/Documentation/CrawlerController,SMILA/Glossary,SMILA/Howto integrate a component in SMILA" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/Documentation/HowTo/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" />
<link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=rss" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=atom" />
<title>SMILA/Documentation/HowTo/How to implement a crawler - Eclipsepedia</title>
<style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style>
<link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" />
<link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" />
<!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]-->
<!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]-->
<!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]-->
<!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]-->
<!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]-->
<!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "eclipsenova";
var stylepath = "/skins";
var wgArticlePath = "/$1";
var wgScriptPath = "";
var wgScript = "/index.php";
var wgServer = "http://wiki.eclipse.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "SMILA/Documentation/HowTo/How_to_implement_a_crawler";
var wgTitle = "SMILA/Documentation/HowTo/How to implement a crawler";
var wgAction = "view";
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
var wgArticleId = "15203";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = "286129";
var wgVersion = "1.12.0";
var wgEnableAPI = true;
var wgEnableWriteAPI = false;
/*]]>*/</script>
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script>
<!-- Performance mods similar to those for bug 166401 -->
<script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=eclipsenova"><!-- site js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script>
<style type="text/css">/*<![CDATA[*/
.source-text {line-height: normal; font-size: medium;}
.source-text li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for text
* CSS class: source-text, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-text .de1, .source-text .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-text {}
.source-text .head {}
.source-text .foot {}
.source-text .imp {font-weight: bold; color: red;}
.source-text .ln-xtra {color: #cc0; background-color: #ffc;}
.source-text li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-text li.li2 {font-weight: bold;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><style type="text/css">/*<![CDATA[*/
.source-xml {line-height: normal; font-size: medium;}
.source-xml li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for xml
* CSS class: source-xml, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-xml .de1, .source-xml .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-xml {}
.source-xml .head {}
.source-xml .foot {}
.source-xml .imp {font-weight: bold; color: red;}
.source-xml .ln-xtra {color: #cc0; background-color: #ffc;}
.source-xml li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-xml li.li2 {font-weight: bold;}
.source-xml .coMULTI {color: #808080; font-style: italic;}
.source-xml .es0 {color: #000099; font-weight: bold;}
.source-xml .br0 {color: #66cc66;}
.source-xml .st0 {color: #ff0000;}
.source-xml .nu0 {color: #cc66cc;}
.source-xml .sc0 {color: #00bbdd;}
.source-xml .sc1 {color: #ddbb00;}
.source-xml .sc2 {color: #339933;}
.source-xml .sc3 {color: #009900;}
.source-xml .re0 {color: #000066;}
.source-xml .re1 {font-weight: bold; color: black;}
.source-xml .re2 {font-weight: bold; color: black;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><style type="text/css">/*<![CDATA[*/
.source-java {line-height: normal; font-size: medium;}
.source-java li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for java
* CSS class: source-java, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-java .de1, .source-java .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-java {}
.source-java .head {}
.source-java .foot {}
.source-java .imp {font-weight: bold; color: red;}
.source-java .ln-xtra {color: #cc0; background-color: #ffc;}
.source-java li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-java li.li2 {font-weight: bold;}
.source-java .kw1 {color: #7F0055; font-weight: bold;}
.source-java .kw2 {color: #7F0055; font-weight: bold;}
.source-java .kw3 {color: #000000; font-weight: normal}
.source-java .kw4 {color: #7F0055; font-weight: bold;}
.source-java .co1 {color: #3F7F5F; font-style: italic;}
.source-java .co2 {color: #3F7F5F;}
.source-java .co3 {color: #3F7F5F; font-style: italic; font-weight: bold;}
.source-java .coMULTI {color: #3F5FBF; font-style: italic;}
.source-java .es0 {color: #000000;}
.source-java .br0 {color: #000000;}
.source-java .st0 {color: #2A00ff;}
.source-java .nu0 {color: #000000;}
.source-java .me1 {color: #000000;}
.source-java .me2 {color: #000000;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><link rel="stylesheet" type="text/css" href="How_to_implement_a_crawler.html" /> </head>
<body class="mediawiki ns-0 ltr page-SMILA_Documentation_HowTo_How_to_implement_a_crawler">
<div id="globalWrapper">
<div id="column-one">
<!-- Eclipse Additions for the Top Nav start here M. Ward-->
<div id="header">
<div id="header-graphic">
<img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki">
</div>
<!-- Pulled 101409 Mward -->
<div class="portlet" id="p-personal">
<div class="pBody">
<ul>
<li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/HowTo/How_to_implement_a_crawler">Log in</a></li>
</ul>
</div>
</div>
<div id="header-icons">
<div id="sites">
<ul id="sitesUL">
<li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li>
<li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li>
<li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li>
<li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li>
<li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li>
<li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li>
</ul>
</div>
</div>
</div>
<!-- NEW HEADER STUFF HERE -->
<div id="header-menu">
<div id="header-nav">
<ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li>
<li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li>
<li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li>
<li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li>
<li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li>
<li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li>
<li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li>
</ul>
</div>
<div id="header-utils">
<!-- moved the search window here -->
<form action="http://wiki.eclipse.org/Special:Search" >
<input class="input" name="search" type="text" accesskey="f" value="" />
<input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" />&nbsp;
<input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" />
</form>
</div>
</div>
<!-- Eclipse Additions for the Header stop here -->
<!-- Additions and mods for leftside nav Start here -->
<!--Started nav rip here-->
<!-- these are the nav controls main page, changes etc -->
<div id="novaContent" class="faux">
<div id="leftcol">
<ul id="leftnav">
<!-- these are the page controls, edit history etc -->
<li class="separator"><a class="separator">Navigation &#160;&#160;</li>
<li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li>
<li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li>
<li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li>
<li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li>
<li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li>
<li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li>
<li class="separator"><a class="separator">Toolbox &#160;&#160;</a></li>
<li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Documentation/HowTo/How_to_implement_a_crawler">What links here</a></li>
<li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Documentation/HowTo/How_to_implement_a_crawler">Related changes</a></li>
<!-- This is the toolbox section -->
<li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li>
<li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li>
<li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/HowTo/How_to_implement_a_crawler&amp;printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/HowTo/How_to_implement_a_crawler&amp;oldid=286129">Permanent link</a></li> </ul>
</div>
<!-- Additions and mods for leftside nav End here -->
<div id="column-content">
<div id="content">
<a name="top" id="top"></a>
<div id="tabs">
<ul class="primary">
<li class="active"><a href="How_to_implement_a_crawler.html"><span class="tab">Page</span></a></li>
<li><a href="http://wiki.eclipse.org/Talk:SMILA/Documentation/HowTo/How_to_implement_a_crawler"><span class="tab">Discussion</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/HowTo/How_to_implement_a_crawler&amp;action=edit"><span class="tab">View source</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/HowTo/How_to_implement_a_crawler&amp;action=history"><span class="tab">History</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/HowTo/How%20to%20implement%20a%20crawler"><span class="tab">Edit</span></a></li>
</ul>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<h1 class="firstHeading">SMILA/Documentation/HowTo/How to implement a crawler</h1>
<div id="bodyContent">
<h3 id="siteSub">From Eclipsepedia</h3>
<div id="contentSub"><span class="subpages">&lt; <a href="../../../SMILA.html" title="SMILA">SMILA</a> | <a href="../../Documentation.1.html" title="SMILA/Documentation">Documentation</a> | <a href="../HowTo.html" title="SMILA/Documentation/HowTo">HowTo</a></span></div>
<div id="jump-to-nav">Jump to: <a href="How_to_implement_a_crawler.html#column-one">navigation</a>, <a href="How_to_implement_a_crawler.html#searchInput">search</a></div> <!-- start content -->
<div class="messagebox" style="background-color: #def3fe; border: 1px solid #c5d7e0; color: black; padding: 5px; margin: 1ex 0; min-height: 35px; padding-left: 45px;">
<div style="float: left; margin-left: -40px;"><a href="http://wiki.eclipse.org/Image:Note.png" class="image" title="Note.png"><img alt="" src="http://wiki.eclipse.org/images/c/cc/Note.png" width="35" height="35" border="0" /></a></div>
<div><b>This is deprecated for SMILA 1.0, the connectivity framework is still functional but will aimed to be replaced by scalable import based on SMILAs job management.</b><br /></div>
</div>
<p>Explains how to implement an <a href="../../Glossary.html#C" title="SMILA/Glossary">Crawler</a> and <a href="../../Howto_integrate_a_component_in_SMILA.html" class="mw-redirect" title="SMILA/Howto integrate a component in SMILA">add its functionality</a> to SMILA.
</p>
<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1"><a href="How_to_implement_a_crawler.html#Prepare_bundle_and_manifest"><span class="tocnumber">1</span> <span class="toctext">Prepare bundle and manifest</span></a></li>
<li class="toclevel-1"><a href="How_to_implement_a_crawler.html#Prepare_DataSourceConnect_schema_and_classes"><span class="tocnumber">2</span> <span class="toctext">Prepare DataSourceConnect schema and classes</span></a></li>
<li class="toclevel-1"><a href="How_to_implement_a_crawler.html#OSGi_and_Declarative_Service_requirements"><span class="tocnumber">3</span> <span class="toctext">OSGi and Declarative Service requirements</span></a></li>
<li class="toclevel-1"><a href="How_to_implement_a_crawler.html#Implement_your_crwler"><span class="tocnumber">4</span> <span class="toctext">Implement your crwler</span></a></li>
<li class="toclevel-1"><a href="How_to_implement_a_crawler.html#Activate_your_crawler"><span class="tocnumber">5</span> <span class="toctext">Activate your crawler</span></a>
<ul>
<li class="toclevel-2"><a href="How_to_implement_a_crawler.html#Activation_SMILA_in_eclipse"><span class="tocnumber">5.1</span> <span class="toctext">Activation SMILA in eclipse</span></a></li>
<li class="toclevel-2"><a href="How_to_implement_a_crawler.html#Activation_SMILA_application"><span class="tocnumber">5.2</span> <span class="toctext">Activation SMILA application</span></a></li>
</ul>
</li>
<li class="toclevel-1"><a href="How_to_implement_a_crawler.html#Run_your_crawler"><span class="tocnumber">6</span> <span class="toctext">Run your crawler</span></a></li>
</ul>
</li>
</ul>
</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
<a name="Prepare_bundle_and_manifest"></a><h2> <span class="mw-headline"> Prepare bundle and manifest </span></h2>
<ul><li>Create a new bundle that will contain your crawler. Follow the instructions on <a href="../../Development_Guidelines/Create_a_bundle_(plug-in).html" class="mw-redirect" title="SMILA/Development Guidelines/Create a bundle (plug-in)">How to create a bundle</a>. In this sample we use the prefix <tt>myplugin.crawler.mock</tt> for the name of project.
</li><li>For crawler JXB code generation we need to import SMILA.builder project into our workspace.
</li></ul>
<ul><li>Edit the manifest file and add at least the following packages to the <i>Import-Package</i> section.
<ul><li><tt>org.eclipse.smila.connectivity;version="1.0.0"</tt>
</li><li><tt>org.eclipse.smila.connectivity.framework;version="1.0.0"</tt>
</li><li><tt>org.eclipse.smila.connectivity.framework.performancecounters;version="1.0.0"</tt>
</li><li><tt>org.eclipse.smila.connectivity.framework.schema;version="1.0.0"</tt>
</li><li><tt>org.eclipse.smila.connectivity.framework.schema.config;version="1.0.0"</tt>
</li><li><tt>org.eclipse.smila.connectivity.framework.schema.config.interfaces;version="1.0.0"</tt>
</li><li><tt>org.eclipse.smila.connectivity.framework.util;version="1.0.0"</tt>
</li><li><tt>org.eclipse.smila.datamodel;version="1.0.0"</tt>
</li></ul>
</li></ul>
<ul><li>you will have to add additional packages to fill you crawler with business logic&nbsp;!
</li></ul>
<ul><li>Now your MANIFEST.MF file should be like
</li></ul>
<div dir="ltr" style="text-align: left;"><pre class="source-text">Manifest-Version: 1.0
Bundle-ManifestVersion: 2
Bundle-Name: Mock Crawler
Bundle-SymbolicName: myplugin.crawler.mock
Bundle-Version: 1.0.0
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
Import-Package:
org.eclipse.smila.connectivity;version=&quot;1.0.0&quot;,
org.eclipse.smila.connectivity.framework;version=&quot;1.0.0&quot;,
org.eclipse.smila.connectivity.framework.performancecounters;version=&quot;1.0.0&quot;,
org.eclipse.smila.connectivity.framework.schema;version=&quot;1.0.0&quot;,
org.eclipse.smila.connectivity.framework.schema.config;version=&quot;1.0.0&quot;,
org.eclipse.smila.connectivity.framework.schema.config.interfaces;version=&quot;1.0.0&quot;,
org.eclipse.smila.connectivity.framework.util;version=&quot;1.0.0&quot;,
org.eclipse.smila.datamodel;version=&quot;1.0.0&quot;</pre></div>
<a name="Prepare_DataSourceConnect_schema_and_classes"></a><h2> <span class="mw-headline"> Prepare DataSourceConnect schema and classes </span></h2>
<ul><li>create an additional source folder <tt>code/gen</tt> to contain the generated schema sources
<ul><li>Right-click your bundle and click <i>New &gt; Source Folder</i>.
</li><li>Enter "code/gen" as the folder name.
</li><li>edit build.properties and add folder <tt>code/gen</tt> to the source folders.
</li></ul>
</li></ul>
<div dir="ltr" style="text-align: left;"><pre class="source-text">source.. = code/src/,\
code/gen/
output.. = code/bin/</pre></div>
<p><br />
</p>
<ul><li>create schema definition
<ul><li>create a folder <tt>schema</tt> in your bundle
</li><li>create file <tt>schemas\MockCrawlerSchema.xsd</tt> to contain the XSD schema for the crawler configuration based on the abstract XSD schema "RootDataSourceConnectionConfigSchema"
</li><li>therin you have to provide definitions of "Process" and "Attribute" nodes for crawler specific information
</li><li>the following code snippet can be used as a template
</li></ul>
</li></ul>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;?xml</span> <span class="re0">version</span>=<span class="st0">&quot;1.0&quot;</span> <span class="re0">encoding</span>=<span class="st0">&quot;UTF-8&quot;</span><span class="re2">?&gt;</span></span>
<span class="sc3"><span class="re1">&lt;xs:schema</span> <span class="re0">elementFormDefault</span>=<span class="st0">&quot;qualified&quot;</span> <span class="re0">attributeFormDefault</span>=<span class="st0">&quot;unqualified&quot;</span> <span class="re0">xmlns:xs</span>=<span class="st0">&quot;http://www.w3.org/2001/XMLSchema&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;xs:redefine</span> <span class="re0">schemaLocation</span>=<span class="st0">&quot;../../org.eclipse.smila.connectivity.framework.schema/schemas/RootDataSourceConnectionConfigSchema.xsd&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;xs:complexType</span> <span class="re0">name</span>=<span class="st0">&quot;Process&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;xs:annotation<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;xs:documentation<span class="re2">&gt;</span></span></span>Process Specification<span class="sc3"><span class="re1">&lt;/xs:documentation<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/xs:annotation<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;xs:complexContent<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;xs:extension</span> <span class="re0">base</span>=<span class="st0">&quot;Process&quot;</span><span class="re2">&gt;</span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;</span>\!--define crawler specific process here --<span class="re2">&gt;</span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;/xs:extension<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/xs:complexContent<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/xs:complexType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;xs:complexType</span> <span class="re0">name</span>=<span class="st0">&quot;Attribute&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;xs:complexContent<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;xs:extension</span> <span class="re0">base</span>=<span class="st0">&quot;Attribute&quot;</span><span class="re2">&gt;</span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;</span>\!--define crawler specific attributes here --<span class="re2">&gt;</span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;/xs:extension<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/xs:complexContent<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/xs:complexType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/xs:redefine<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/xs:schema<span class="re2">&gt;</span></span></span></pre></div>
<ul><li>create JAXB mapping
<ul><li>create file <tt>schemas\MockCrawlerSchema.jxb</tt> to contain the JAXB mappings used for generating configuration classes.
</li><li>Here is an example for the <tt>MockCrawler</tt> JXB file you can use as a template, just rename the "schemaLocation" and "package name":
</li></ul>
</li></ul>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;jxb:bindings</span> <span class="re0">version</span>=<span class="st0">&quot;1.0&quot;</span>
<span class="re0">xmlns:jxb</span>=<span class="st0">&quot;http://java.sun.com/xml/ns/jaxb&quot;</span>
<span class="re0">xmlns:xs</span>=<span class="st0">&quot;http://www.w3.org/2001/XMLSchema&quot;</span>
<span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;jxb:bindings</span> <span class="re0">schemaLocation</span>=<span class="st0">&quot;MockCrawlerSchema.xsd&quot;</span> <span class="re0">node</span>=<span class="st0">&quot;/xs:schema&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;jxb:schemaBindings<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;jxb:package</span> <span class="re0">name</span>=<span class="st0">&quot;mypackage.crawler.mock.messages&quot;</span><span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;/jxb:schemaBindings<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;jxb:globalBindings<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;jxb:javaType</span> <span class="re0">name</span>=<span class="st0">&quot;java.util.Date&quot;</span> <span class="re0">xmlType</span>=<span class="st0">&quot;xs:dateTime&quot;</span> <span class="re0">printMethod</span>=<span class="st0">&quot;org.eclipse.smila.connectivity.framework.schema.tools.SimpleDateFormatter.print&quot;</span> <span class="re0">parseMethod</span>=<span class="st0">&quot;org.eclipse.smila.connectivity.framework.schema.tools.SimpleDateFormatter.parse&quot;</span><span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;jxb:javaType</span> <span class="re0">name</span>=<span class="st0">&quot;org.eclipse.smila.connectivity.framework.schema.config.MimeTypeAttributeType&quot;</span> <span class="re0">xmlType</span>=<span class="st0">&quot;MimeTypeAttributeType&quot;</span> <span class="re0">parseMethod</span>=<span class="st0">&quot;org.eclipse.smila.connectivity.framework.schema.config.MimeTypeAttributeType.fromValue&quot;</span> <span class="re0">printMethod</span>=<span class="st0">&quot;org.eclipse.smila.connectivity.framework.schema.config.MimeTypeAttributeType.toValue&quot;</span><span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;jxb:serializable</span> <span class="re0">uid</span>=<span class="st0">&quot;1&quot;</span><span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;/jxb:globalBindings<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/jxb:bindings<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/jxb:bindings<span class="re2">&gt;</span></span></span></pre></div>
<p><br />
</p>
<ul><li>Add a schema location reference in the plug-in implementation
<ul><li>Create a new class (<tt>DataSourceConnectionConfigPluginImpl</tt>) which implements the interface <tt>DataSourceConnectionConfigPlugin</tt>.
</li><li>Use the method <tt>String getSchemaLocation()</tt> to return "schemas/MockCrawlerSchema.xsd".
</li><li>Use the method <tt>String getMessagesPackage()</tt> to return package name"mypackage.crawler.mock.messages".
</li></ul>
</li></ul>
Here is an example implementation for the <tt>MockCrawler</tt> you can use as a template: <div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">package</span> mypackage.<span class="me1">crawler</span>.<span class="me1">mock</span>;
&nbsp;
<span class="kw1">import</span> org.<span class="me1">eclipse</span>.<span class="me1">smila</span>.<span class="me1">connectivity</span>.<span class="me1">framework</span>.<span class="me1">schema</span>.<span class="me1">DataSourceConnectionConfigPlugin</span>;
&nbsp;
<span class="coMULTI">/**
* The Class DataSourceConnectionConfigPluginImpl.
*/</span>
<span class="kw1">public</span> <span class="kw1">class</span> DataSourceConnectionConfigPluginImpl <span class="kw1">implements</span> DataSourceConnectionConfigPlugin <span class="br0">&#123;</span>
&nbsp;
<span class="coMULTI">/**
* {@inheritDoc}
*
* @see org.eclipse.smila.connectivity.framework.schema.DataSourceConnectionConfigPlugin#getSchemaLocation()
*/</span>
<span class="kw1">public</span> <span class="kw3">String</span> getSchemaLocation<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span>
<span class="kw1">return</span> <span class="st0">&quot;schemas/MockCrawlerSchema.xsd&quot;</span>;
<span class="br0">&#125;</span>
&nbsp;
<span class="coMULTI">/**
* {@inheritDoc}
*
* @see org.eclipse.smila.connectivity.framework.schema.DataSourceConnectionConfigPlugin#getMessagesPackage()
*/</span>
<span class="kw1">public</span> <span class="kw3">String</span> getMessagesPackage<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span>
<span class="kw1">return</span> <span class="st0">&quot;mypackage.crawler.mock.messages&quot;</span>;
<span class="br0">&#125;</span>
&nbsp;
<span class="br0">&#125;</span></pre></div>
<ul><li>create new file <tt>plugin.xml</tt>
<ul><li>define the extension for <tt>org.eclipse.smila.connectivity.framework.schema.extension</tt>, using the bundle name as ID and NAME.
</li><li>set the schema class to your implmenetation of interface <tt>DataSourceConnectionConfigPlugin</tt>
</li><li>Here is an example for the <tt>MockCrawler</tt> <tt>plugin.xml</tt> file you can use as a template:
</li></ul>
</li></ul>
<div dir="ltr" style="text-align: left;"><pre class="source-java">&lt;plugin&gt;
&lt;extension
id=<span class="st0">&quot;myplugin.crawler.mock&quot;</span>
name=<span class="st0">&quot;myplugin.crawler.mock&quot;</span>
point=<span class="st0">&quot;org.eclipse.smila.connectivity.framework.schema.extension&quot;</span>&gt;
&lt;schema
<span class="kw1">class</span>=<span class="st0">&quot;mypackage.crawler.mock.DataSourceConnectionConfigPluginImpl&quot;</span>&gt;
&lt;/schema&gt;
&lt;/extension&gt;
&lt;/plugin&gt;</pre></div>
<p><br />
</p>
<ul><li>Compile schema into JAXB classes by using <tt>ant</tt>
<ul><li>See <a href="../../Development_Guidelines/Setup_for_JAXB_code_generation.html" title="SMILA/Development Guidelines/Setup for JAXB code generation">SMILA/Development Guidelines/Setup for JAXB code generation</a> for instruction on how to setup the JAXB generation tools. It is advised to let lib outside the workspace, for example in a lower level folder. (my -Dlib.dir=../../
</li><li>create a new file <tt>build.xml</tt> to contain JXB build information. Use the following template as the content for file <tt>build.xml</tt> and rename the property value accordingly:
</li></ul>
</li></ul>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;project</span> <span class="re0">name</span>=<span class="st0">&quot;sub-build&quot;</span> <span class="re0">default</span>=<span class="st0">&quot;compile-schema-and-decorate&quot;</span> <span class="re0">basedir</span>=<span class="st0">&quot;.&quot;</span><span class="re2">&gt;</span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;property</span> <span class="re0">name</span>=<span class="st0">&quot;schema.name&quot;</span> <span class="re0">value</span>=<span class="st0">&quot;MockCrawlerSchema&quot;</span> <span class="re2">/&gt;</span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;import</span> <span class="re0">file</span>=<span class="st0">&quot;../SMILA.builder/xjc/build.xml&quot;</span> <span class="re2">/&gt;</span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;/project<span class="re2">&gt;</span></span></span></pre></div>
<ul><li><ul><li>Launch <tt>ant -Dlib.dir=../lib</tt> from a cmd console to create the java files or to see any error messages.
</li></ul>
</li></ul>
<p><br /> <b>Note:</b> If you rename the schema file name, make sure to update the following locations:
</p>
<ul><li>Plug-in implementation classes
</li><li><tt>MockCrawlerSchema.jxb</tt> (it also should be renamed with the same name as schema)
</li><li><tt>build.xml</tt>
</li></ul>
<a name="OSGi_and_Declarative_Service_requirements"></a><h2> <span class="mw-headline"> OSGi and Declarative Service requirements </span></h2>
<ul><li>It is not required to implement a BundleActivator.
</li><li>Create the top level folder <tt>OSGI-INF</tt>.
</li><li>Create a Component Description file in <tt>OSGI-INF</tt>. You can name the file as you like, but it is good practice to name it like the crawler. Therein you have to provide a unique component name, it should be the same as the crawler's class name. Then you have to provide your implementation class and the service interface class, which is always <tt>org.eclipse.smila.connectivity.framework.Crawler</tt>. Here is an example for the <tt>MockCrawler</tt> component description file you can use as a template:
</li></ul>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;component</span> <span class="re0">name</span>=<span class="st0">&quot;MockCrawler&quot;</span> <span class="re0">immediate</span>=<span class="st0">&quot;false&quot;</span> <span class="re0">factory</span>=<span class="st0">&quot;CrawlerFactory&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;implementation</span> <span class="re0">class</span>=<span class="st0">&quot;mypackage.crawer.mock.MockCrawler&quot;</span> <span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;service<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;provide</span> <span class="re0">interface</span>=<span class="st0">&quot;org.eclipse.smila.connectivity.framework.Crawler&quot;</span><span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;/service<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/component<span class="re2">&gt;</span></span></span></pre></div>
<ul><li>Add a <i>Service-Component</i> entry to your manifest file, e.g.:
</li></ul>
<pre>Service-Component: OSGI-INF/mockcrawler.xml
</pre>
<ul><li>Open <tt>build.properties</tt> and change the binary build: Add the folders <tt>OSGI-INF</tt> and <tt>schemas</tt> as well as the file <tt>plugin.xml</tt>.
</li></ul>
<div dir="ltr" style="text-align: left;"><pre class="source-xml">bin.includes = META-INF/,\
.,\
plugin.xml,\
schemas/,\
OSGI-INF/</pre></div>
<p><br />
</p>
<a name="Implement_your_crwler"></a><h2> <span class="mw-headline"> Implement your crwler </span></h2>
<ul><li>Implement your crawler in a new class extending <tt>org.eclipse.smila.connectivity.framework.AbstractCrawler</tt>.
</li></ul>
<ul><li>Integrate your new agent bundle into the build process: Refer to the page <a href="../../Development_Guidelines/How_to_integrate_new_bundle_into_build_process.html" class="mw-redirect" title="SMILA/Development Guidelines/How to integrate new bundle into build process">How to integrate new bundle into build process</a> for further instructions.
</li></ul>
<ul><li> Follow the example of FileSystemCrawler
</li></ul>
<p>[optional]
</p>
<ul><li>Create a JUnit test bundle for this crawler e.g. <tt>myplugin.crawler.mock.test</tt>.
</li><li>Integrate your test bundle into the build process: Refer to the page <a href="../../Development_Guidelines/How_to_integrate_test_bundle_into_build_process.html" class="mw-redirect" title="SMILA/Development Guidelines/How to integrate test bundle into build process">How to integrate test bundle into build process</a>) for further instructions.
</li></ul>
<a name="Activate_your_crawler"></a><h2> <span class="mw-headline"> Activate your crawler </span></h2>
<a name="Activation_SMILA_in_eclipse"></a><h3> <span class="mw-headline"> Activation SMILA in eclipse </span></h3>
<ul><li>Open the <i>Run</i> dialog, switch to the configuration page of <i>Bundles</i>, select your bundle and set the parameter <i>Default Auto-Start</i> to <i>true</i>.
</li><li>Launch <tt>SMILA.launch</tt>.
</li></ul>
<a name="Activation_SMILA_application"></a><h3> <span class="mw-headline"> Activation SMILA application </span></h3>
<ul><li>Insert your bundle , e.g. <tt>myplugin.crawler.mock@4:start</tt>, to the <tt>config.ini</tt> file.
</li><li>Launch SMILA by calling either <tt>SMILA.exe</tt> or <tt>eclipse.exe -console</tt>
</li></ul>
<a name="Run_your_crawler"></a><h2> <span class="mw-headline"> Run your crawler </span></h2>
<p>Information on how to start and run an Crawler can be found in the <a href="../CrawlerController.html" title="SMILA/Documentation/CrawlerController">CrawlerController</a> documentation.
</p>
<!--
NewPP limit report
Preprocessor node count: 126/1000000
Post-expand include size: 1045/2097152 bytes
Template argument size: 515/2097152 bytes
#ifexist count: 0/100
-->
<!-- Saved in parser cache with key wikidb:pcache:idhash:15203-0!1!0!!en!2!edit=0 and timestamp 20120202171427 -->
<div class="printfooter">
Retrieved from "<a href="How_to_implement_a_crawler.html">http://wiki.eclipse.org/SMILA/Documentation/HowTo/How_to_implement_a_crawler</a>"</div>
<div id="catlinks"><p class='catlinks'><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Category</a>: <span dir='ltr'><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></span></p></div> <!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<!-- Yoink of toolbox for phoenix moved up -->
</div>
</div>
<div id="clearFooter"/>
<div id="footer" >
<ul id="footernav">
<li class="first"><a href="http://www.eclipse.org/">Home</a></li>
<li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li>
<li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li>
</ul>
<span id="copyright">Copyright &copy; 2012 The Eclipse Foundation. All Rights Reserved</span>
<p id="footercredit">This page was last modified 13:29, 24 January 2012 by <a href="http://wiki.eclipse.org/index.php?title=User:Juergen.schumacher.attensity.com&amp;action=edit" class="new" title="User:Juergen.schumacher.attensity.com">Juergen Schumacher</a>. Based on work by <a href="http://wiki.eclipse.org/User:Andreas.schank.attensity.com" title="User:Andreas.schank.attensity.com">A. Schank</a>, <a href="http://wiki.eclipse.org/index.php?title=User:Leccher.gmail.com&amp;action=edit" class="new" title="User:Leccher.gmail.com">Lorenzo </a> and <a href="http://wiki.eclipse.org/index.php?title=User:Daniel.stucky.attensity.com&amp;action=edit" class="new" title="User:Daniel.stucky.attensity.com">Daniel Stucky</a> and <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/HowTo/How_to_implement_a_crawler&amp;action=credits" title="SMILA/Documentation/HowTo/How to implement a crawler">others</a>.</p>
<p id="footerviews">This page has been accessed 6,278 times.</p>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
var pageTracker = _gat._getTracker("UA-910670-4");
pageTracker._trackPageview();
</script>
<!-- <div class="visualClear"></div> -->
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served in 0.052 secs. --></body></html>