blob: 21686864dfaa17630a961b63595ed7846304f5c6 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="SMILA/Documentation for 5 Minutes to Success,SMILA,SMILA/Documentation/5 more minutes to change the workflow,SMILA/Documentation/Importing/Concept,SMILA/Documentation/JobDefinitions,SMILA/Documentation/JobManager,SMILA/Documentation/Solr,SMILA/Documentation/TikaPipelet,SMILA/Documentation/Using The ReST API,SMILA/Documentation/Worker/PipelineProcessorWorker,SMILA/FAQ" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" />
<link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=rss" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=atom" />
<title>SMILA/Documentation for 5 Minutes to Success - Eclipsepedia</title>
<style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "http://wiki.eclipse.org/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style>
<link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" />
<link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" />
<!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]-->
<!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]-->
<!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]-->
<!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]-->
<!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]-->
<!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "eclipsenova";
var stylepath = "/skins";
var wgArticlePath = "/$1";
var wgScriptPath = "";
var wgScript = "/index.php";
var wgServer = "http://wiki.eclipse.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "SMILA/Documentation_for_5_Minutes_to_Success";
var wgTitle = "SMILA/Documentation for 5 Minutes to Success";
var wgAction = "view";
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
var wgArticleId = "15784";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = "333225";
var wgVersion = "1.12.0";
var wgEnableAPI = true;
var wgEnableWriteAPI = false;
/*]]>*/</script>
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script>
<!-- Performance mods similar to those for bug 166401 -->
<script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=eclipsenova"><!-- site js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script>
<style type="text/css">/*<![CDATA[*/
.source-text {line-height: normal; font-size: medium;}
.source-text li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for text
* CSS class: source-text, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-text .de1, .source-text .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-text {}
.source-text .head {}
.source-text .foot {}
.source-text .imp {font-weight: bold; color: red;}
.source-text .ln-xtra {color: #cc0; background-color: #ffc;}
.source-text li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-text li.li2 {font-weight: bold;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "http://wiki.eclipse.org/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><style type="text/css">/*<![CDATA[*/
.source-javascript {line-height: normal; font-size: medium;}
.source-javascript li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for javascript
* CSS class: source-javascript, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-javascript .de1, .source-javascript .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-javascript {}
.source-javascript .head {}
.source-javascript .foot {}
.source-javascript .imp {font-weight: bold; color: red;}
.source-javascript .ln-xtra {color: #cc0; background-color: #ffc;}
.source-javascript li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-javascript li.li2 {font-weight: bold;}
.source-javascript .kw1 {color: #000066; font-weight: bold;}
.source-javascript .kw2 {color: #003366; font-weight: bold;}
.source-javascript .kw3 {color: #000066;}
.source-javascript .co1 {color: #009900; font-style: italic;}
.source-javascript .coMULTI {color: #009900; font-style: italic;}
.source-javascript .es0 {color: #000099; font-weight: bold;}
.source-javascript .br0 {color: #66cc66;}
.source-javascript .st0 {color: #3366CC;}
.source-javascript .nu0 {color: #CC0000;}
.source-javascript .me1 {color: #006600;}
.source-javascript .sc0 {}
.source-javascript .sc1 {}
.source-javascript .sc2 {}
.source-javascript .sc3 {}
.source-javascript .re0 {color: #0066FF;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "http://wiki.eclipse.org/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><link rel="stylesheet" type="text/css" href="Documentation_for_5_Minutes_to_Success.html" /> </head>
<body class="mediawiki ns-0 ltr page-SMILA_Documentation_for_5_Minutes_to_Success">
<div id="globalWrapper">
<div id="column-one">
<!-- Eclipse Additions for the Top Nav start here M. Ward-->
<div id="header">
<div id="header-graphic">
<img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki">
</div>
<!-- Pulled 101409 Mward -->
<div class="portlet" id="p-personal">
<div class="pBody">
<ul>
<li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation_for_5_Minutes_to_Success">Log in</a></li>
</ul>
</div>
</div>
<div id="header-icons">
<div id="sites">
<ul id="sitesUL">
<li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li>
<li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li>
<li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li>
<li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li>
<li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li>
<li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li>
</ul>
</div>
</div>
</div>
<!-- NEW HEADER STUFF HERE -->
<div id="header-menu">
<div id="header-nav">
<ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li>
<li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li>
<li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li>
<li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li>
<li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li>
<li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li>
<li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li>
</ul>
</div>
<div id="header-utils">
<!-- moved the search window here -->
<form action="http://wiki.eclipse.org/Special:Search" >
<input class="input" name="search" type="text" accesskey="f" value="" />
<input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" />&nbsp;
<input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" />
</form>
</div>
</div>
<!-- Eclipse Additions for the Header stop here -->
<!-- Additions and mods for leftside nav Start here -->
<!--Started nav rip here-->
<!-- these are the nav controls main page, changes etc -->
<div id="novaContent" class="faux">
<div id="leftcol">
<ul id="leftnav">
<!-- these are the page controls, edit history etc -->
<li class="separator"><a class="separator">Navigation &#160;&#160;</li>
<li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li>
<li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li>
<li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li>
<li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li>
<li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li>
<li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li>
<li class="separator"><a class="separator">Toolbox &#160;&#160;</a></li>
<li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Documentation_for_5_Minutes_to_Success">What links here</a></li>
<li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Documentation_for_5_Minutes_to_Success">Related changes</a></li>
<!-- This is the toolbox section -->
<li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li>
<li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li>
<li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation_for_5_Minutes_to_Success&amp;printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation_for_5_Minutes_to_Success&amp;oldid=333225">Permanent link</a></li> </ul>
</div>
<!-- Additions and mods for leftside nav End here -->
<div id="column-content">
<div id="content">
<a name="top" id="top"></a>
<div id="tabs">
<ul class="primary">
<li class="active"><a href="Documentation_for_5_Minutes_to_Success.html"><span class="tab">Page</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Documentation_for_5_Minutes_to_Success&amp;action=edit"><span class="tab">Discussion</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation_for_5_Minutes_to_Success&amp;action=edit"><span class="tab">View source</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation_for_5_Minutes_to_Success&amp;action=history"><span class="tab">History</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation&#32;for&#32;5&#32;Minutes&#32;to&#32;Success"><span class="tab">Edit</span></a></li>
</ul>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<h1 class="firstHeading">SMILA/Documentation for 5 Minutes to Success</h1>
<div id="bodyContent">
<h3 id="siteSub">From Eclipsepedia</h3>
<div id="contentSub"><span class="subpages">&lt; <a href="../SMILA.html" title="SMILA">SMILA</a></span></div>
<div id="jump-to-nav">Jump to: <a href="Documentation_for_5_Minutes_to_Success.html#column-one">navigation</a>, <a href="Documentation_for_5_Minutes_to_Success.html#searchInput">search</a></div> <!-- start content -->
<p>
On this page we describe the necessary steps to install and run SMILA in order to create a search index on the <a href="../SMILA.html" title="SMILA">SMILA</a> Eclipsepedia pages and search them.
</p><p>If you have any troubles or the results differ from what is described here, check the <a href="FAQ.html" title="SMILA/FAQ">FAQ</a>.
</p>
<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1"><a href="Documentation_for_5_Minutes_to_Success.html#Supported_Platforms"><span class="tocnumber">1</span> <span class="toctext">Supported Platforms</span></a></li>
<li class="toclevel-1"><a href="Documentation_for_5_Minutes_to_Success.html#Download_and_start_SMILA"><span class="tocnumber">2</span> <span class="toctext">Download and start SMILA</span></a>
<ul>
<li class="toclevel-2"><a href="Documentation_for_5_Minutes_to_Success.html#Preconditions"><span class="tocnumber">2.1</span> <span class="toctext">Preconditions</span></a>
<ul>
<li class="toclevel-3"><a href="Documentation_for_5_Minutes_to_Success.html#JRE"><span class="tocnumber">2.1.1</span> <span class="toctext">JRE</span></a></li>
<li class="toclevel-3"><a href="Documentation_for_5_Minutes_to_Success.html#Linux"><span class="tocnumber">2.1.2</span> <span class="toctext">Linux</span></a></li>
<li class="toclevel-3"><a href="Documentation_for_5_Minutes_to_Success.html#MacOS"><span class="tocnumber">2.1.3</span> <span class="toctext">MacOS</span></a></li>
</ul>
</li>
<li class="toclevel-2"><a href="Documentation_for_5_Minutes_to_Success.html#Start_SMILA"><span class="tocnumber">2.2</span> <span class="toctext">Start SMILA</span></a></li>
<li class="toclevel-2"><a href="Documentation_for_5_Minutes_to_Success.html#Stop_SMILA"><span class="tocnumber">2.3</span> <span class="toctext">Stop SMILA</span></a></li>
</ul>
</li>
<li class="toclevel-1"><a href="Documentation_for_5_Minutes_to_Success.html#Install_a_REST_client"><span class="tocnumber">3</span> <span class="toctext">Install a REST client</span></a></li>
<li class="toclevel-1"><a href="Documentation_for_5_Minutes_to_Success.html#Start_Indexing_Job_and_Crawl_Import"><span class="tocnumber">4</span> <span class="toctext">Start Indexing Job and Crawl Import</span></a>
<ul>
<li class="toclevel-2"><a href="Documentation_for_5_Minutes_to_Success.html#Start_indexing_job_run"><span class="tocnumber">4.1</span> <span class="toctext">Start indexing job run</span></a></li>
<li class="toclevel-2"><a href="Documentation_for_5_Minutes_to_Success.html#Start_the_crawler"><span class="tocnumber">4.2</span> <span class="toctext">Start the crawler</span></a></li>
</ul>
</li>
<li class="toclevel-1"><a href="Documentation_for_5_Minutes_to_Success.html#Search_the_index"><span class="tocnumber">5</span> <span class="toctext">Search the index</span></a></li>
<li class="toclevel-1"><a href="Documentation_for_5_Minutes_to_Success.html#Stop_indexing_job_run"><span class="tocnumber">6</span> <span class="toctext">Stop indexing job run</span></a></li>
<li class="toclevel-1"><a href="Documentation_for_5_Minutes_to_Success.html#Further_steps"><span class="tocnumber">7</span> <span class="toctext">Further steps</span></a>
<ul>
<li class="toclevel-2"><a href="Documentation_for_5_Minutes_to_Success.html#Crawl_the_filesystem"><span class="tocnumber">7.1</span> <span class="toctext">Crawl the filesystem</span></a>
<ul>
<li class="toclevel-3"><a href="Documentation_for_5_Minutes_to_Success.html#Create_your_Job"><span class="tocnumber">7.1.1</span> <span class="toctext">Create your Job</span></a></li>
<li class="toclevel-3"><a href="Documentation_for_5_Minutes_to_Success.html#Start_your_jobs"><span class="tocnumber">7.1.2</span> <span class="toctext">Start your jobs</span></a></li>
<li class="toclevel-3"><a href="Documentation_for_5_Minutes_to_Success.html#Search_for_your_new_data"><span class="tocnumber">7.1.3</span> <span class="toctext">Search for your new data</span></a></li>
</ul>
</li>
<li class="toclevel-2"><a href="Documentation_for_5_Minutes_to_Success.html#5_more_minutes_to_change_the_workflow"><span class="tocnumber">7.2</span> <span class="toctext">5 more minutes to change the workflow</span></a></li>
</ul>
</li>
</ul>
</li>
</ul>
</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
<a name="Supported_Platforms"></a><h2> <span class="mw-headline"> Supported Platforms </span></h2>
<p>The following platforms are supported:
</p>
<ul><li>Linux 32 Bit
</li><li>Linux 64 Bit
</li><li>Mac OS X 64 Bit (Cocoa)
</li><li>Windows 32 Bit
</li><li>Windows 64 Bit
</li></ul>
<a name="Download_and_start_SMILA"></a><h2> <span class="mw-headline"> Download and start SMILA </span></h2>
<p><a href="http://www.eclipse.org/smila/downloads.php" class="external text" title="http://www.eclipse.org/smila/downloads.php" rel="nofollow">Download</a> the SMILA package matching your <a href="Documentation_for_5_Minutes_to_Success.html#Supported_Platforms" title="">operation system</a> and unpack it to an arbitrary folder. This will result in the following folder structure:
</p>
<pre>
/&lt;SMILA&gt;
/configuration
/features
/jmxclient
/plugins
/workspace
.eclipseproduct
...
SMILA
SMILA.ini
</pre>
<a name="Preconditions"></a><h3> <span class="mw-headline"> Preconditions </span></h3>
<p>To be able to start SMILA, check the following preconditions first:
</p>
<a name="JRE"></a><h4> <span class="mw-headline"> JRE </span></h4>
<p>You will have to provide a JRE executable to be able to run SMILA. The JVM version should be Java 7. You may either:
</p>
<ul><li> add the path of your local JRE executable to the PATH environment variable <br />or<br />
</li><li> add the argument <tt>-vm &lt;path/to/jre/executable&gt;</tt> right at the top of the file <tt>SMILA.ini</tt>. <br />Make sure that <tt>-vm</tt> is indeed the first argument in the file, that there is a line break after it and that there are no leading or trailing blanks. It should look similar to the following:
</li></ul>
<div style="margin-left: 1.5em;">
<div dir="ltr" style="text-align: left;"><pre class="source-text">-vm
d:/java/jre7/bin/java
...</pre></div>
</div>
<a name="Linux"></a><h4> <span class="mw-headline"> Linux </span></h4>
<p>When using the Linux distributable of SMILA, make sure that the files <tt>SMILA</tt> and <tt>jmxclient/run.sh</tt> have executable permissions. If not, set the permission by running the following commands in a console:
<tt>
</p>
<pre>chmod +x ./SMILA
chmod +x ./jmxclient/run.sh
</pre>
<p></tt>
</p>
<a name="MacOS"></a><h4> <span class="mw-headline"> MacOS </span></h4>
<p>When using MAC switch to <tt>SMILA.app/Contents/MacOS/</tt> and set the permission by running the following command in a console:
<tt>
</p>
<pre>chmod a+x ./SMILA
</pre>
<p></tt>
</p>
<a name="Start_SMILA"></a><h3> <span class="mw-headline"> Start SMILA </span></h3>
<p>To start the SMILA engine, simply double-click the <tt>SMILA</tt> executable. Alternatively, open a command line, navigate to the directory where you extracted the files to, and execute the <tt>SMILA</tt> executable. Wait until the engine has been fully started.
</p><p>You can tell if SMILA has fully started if the following line is printed on the OSGI console: <tt>HTTP server started successfully on port 8080</tt> and you can access SMILA's REST API at <a href="http://localhost:8080/smila/" class="external free" title="http://localhost:8080/smila/" rel="nofollow">http://localhost:8080/smila/</a>.
</p><p>When using MAC, navigate to <tt>SMILA.app/Contents/MacOS/</tt> in terminal, then start with <tt>./SMILA</tt>
</p><p>Before continuing, <a href="FAQ.html#How_can_I_see_that_SMILA_started_correctly.3F" title="SMILA/FAQ">check the log file</a> for possible errors.
</p>
<a name="Stop_SMILA"></a><h3> <span class="mw-headline"> Stop SMILA </span></h3>
<p>To stop the SMILA engine, type <tt>close</tt> into the OSGI console and press <i>Enter</i>:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-text">osgi&gt; close</pre></div>
<p>For further OSGI console commands, enter <tt>help</tt>:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-text">osgi&gt; help</pre></div>
<a name="Install_a_REST_client"></a><h2> <span class="mw-headline"> Install a REST client </span></h2>
<p>We're going to use SMILA's REST API to start and stop jobs, so you need a REST client. In <a href="Documentation/Using_The_ReST_API.html#Interactive_Tools" title="SMILA/Documentation/Using The ReST API">REST Tools</a> you find a selection of recommended browser plugins if you haven't got a suitable REST client yet.
</p>
<a name="Start_Indexing_Job_and_Crawl_Import"></a><h2> <span class="mw-headline"> Start Indexing Job and Crawl Import </span></h2>
<p>Now we're going to crawl the SMILA Eclipsepedia pages and index them using the embedded <a href="Documentation/Solr.html" title="SMILA/Documentation/Solr">Solr integration</a>.
</p>
<a name="Start_indexing_job_run"></a><h3> <span class="mw-headline"> Start indexing job run </span></h3>
<p>We are going to start the predefined indexing job "indexUpdate" based on the predefined asynchronous "indexUpdate" workflow. This indexing job will process the imported data.
</p><p>Use your favorite REST Client to start a job run for the job "indexUpdate":
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-javascript">#Request
POST http:<span class="co1">//localhost:8080/smila/jobmanager/jobs/indexUpdate/</span></pre></div>
<p>Your REST client will show a result like this:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-javascript">#Response
<span class="br0">&#123;</span>
<span class="st0">&quot;jobId&quot;</span>&nbsp;: <span class="st0">&quot;20110901-121343613053&quot;</span>,
<span class="st0">&quot;url&quot;</span>&nbsp;: <span class="st0">&quot;http://localhost:8080/smila/jobmanager/jobs/indexUpdate/20110901-121343613053/&quot;</span>
<span class="br0">&#125;</span></pre></div>
<p>You will need the "jobId" later on to finish the job run. The job run Id can also be found via the monitoring API for the job:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-javascript">#Request
GET http:<span class="co1">//localhost:8080/smila/jobmanager/jobs/indexUpdate/</span></pre></div>
<p>In the <tt>SMILA.log</tt> file you will see a message like that:
</p>
<pre>
INFO ... internal.JobRunEngineImpl - started job run '20110901-121343613053' for job 'indexUpdate'
</pre>
<p><b>Further information</b>: The "indexUpdate" workflow uses the <a href="Documentation/Worker/PipelineProcessorWorker.html" title="SMILA/Documentation/Worker/PipelineProcessorWorker">PipelineProcessorWorker</a> that executes the synchronous "AddPipeline" BPEL workflow. So, the synchronous "AddPipeline" BPEL workflow is embedded in the asynchronous "indexUpdate" workflow. For more details about the "indexUpdate" workflow and "indexUpdate" job definitions see <tt>SMILA/configuration/org.eclipse.smila.jobmanager/workflows.json</tt> and <tt>jobs.json</tt>). For more information about job management in general please check the <a href="Documentation/JobManager.html" title="SMILA/Documentation/JobManager">JobManager documentation</a>.
</p>
<a name="Start_the_crawler"></a><h3> <span class="mw-headline"> Start the crawler </span></h3>
<p>Now that the indexing job is running we need to push some data to it. There is a predefined job for indexing the SMILA Eclipsepedia pages which we are going to start right now. For more information about crawl jobs please see <a href="Documentation/Importing/Concept.html" title="SMILA/Documentation/Importing/Concept">Importing Concept</a>. For more information on jobs and tasks in general visit the <a href="Documentation/JobManager.html" title="SMILA/Documentation/JobManager">JobManager manual</a>.
</p><p>To start the job run, POST the following JSON fragment with your REST client to SMILA:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-javascript">#Request
POST http:<span class="co1">//localhost:8080/smila/jobmanager/jobs/crawlSmilaWiki/</span></pre></div>
<p>This starts the job <tt>crawlSmilaWiki</tt>, which crawls the <a href="../SMILA.html" title="SMILA">SMILA Eclipsepedia</a> starting with <tt><a href="../SMILA.html" class="external free" title="http://wiki.eclipse.org/SMILA" rel="nofollow">http://wiki.eclipse.org/SMILA</a></tt> and (by applying the configured filters) following only links that have the same prefix. All pages crawled matching this prefix will be pushed to the import job.
</p><p>If you like, you can monitor both job runs with your REST client at the following URIs:
</p>
<ul><li> Crawl job: <a href="http://localhost:8080/smila/jobmanager/jobs/crawlSmilaWiki" class="external free" title="http://localhost:8080/smila/jobmanager/jobs/crawlSmilaWiki" rel="nofollow">http://localhost:8080/smila/jobmanager/jobs/crawlSmilaWiki</a>
</li><li> Import job: <a href="http://localhost:8080/smila/jobmanager/jobs/indexUpdate" class="external free" title="http://localhost:8080/smila/jobmanager/jobs/indexUpdate" rel="nofollow">http://localhost:8080/smila/jobmanager/jobs/indexUpdate</a>
</li></ul>
<p>Or both in one overview at
</p>
<ul><li> <a href="http://localhost:8080/smila/jobmanager/jobs/" class="external free" title="http://localhost:8080/smila/jobmanager/jobs/" rel="nofollow">http://localhost:8080/smila/jobmanager/jobs/</a>
</li></ul>
<p>The crawling of the wikipedia page should take some time. If all pages are processed, the status of the <a href="http://localhost:8080/smila/jobmanager/jobs/crawlSmilaWiki" class="external text" title="http://localhost:8080/smila/jobmanager/jobs/crawlSmilaWiki" rel="nofollow">crawlSmilaWiki</a>'s job run will change to <span style="font-family:monospace;">SUCCEEDED</span>. You can have a look at SMILA's search page to find out if some of the pages have already made their way into the Solr index.
</p><p><b>Further information:</b> You can find details about the relevant <a href="Manual.html#Importing" class="mw-redirect" title="SMILA/Manual">Import concepts here</a>.
</p>
<a name="Search_the_index"></a><h2> <span class="mw-headline"> Search the index </span></h2>
<div class="messagebox" style="background-color: #def3fe; border: 1px solid #c5d7e0; color: black; padding: 5px; margin: 1ex 0; min-height: 35px; padding-left: 45px;">
<div style="float: left; margin-left: -40px;"><a href="http://wiki.eclipse.org/Image:Note.png" class="image" title="Note.png"><img alt="" src="http://wiki.eclipse.org/images/c/cc/Note.png" width="35" height="35" border="0" /></a></div>
<div><b>Since SMILA uses <a href="Documentation/Solr.html#solrconfig.xml" title="SMILA/Documentation/Solr">Solr's autocommit feature</a> (which is configured in <tt>solrconfig.xml</tt> to a period of 30 seconds or 1000 documents, whichever comes first) it might take some time until you retrieve results.</b><br /></div>
</div>
<p><br />
To search the index which was created by the crawlers, point your browser to <tt><a href="http://localhost:8080/SMILA/search" class="external free" title="http://localhost:8080/SMILA/search" rel="nofollow">http://localhost:8080/SMILA/search</a></tt>. There are currently two stylesheets from which you can select by clicking the respective links in the upper left corner of the header bar: The <i>Default</i> stylesheet shows a reduced search form with text fields like <i>Query</i>, <i>Result Size</i>, and <i>Index</i>, adequate to query the full-text content of the indexed documents. The <i>Advanced</i> stylesheet in turn provides a more detailed search form with text fields for meta-data search like for example <i>Path</i>, <i>MimeType</i>, <i>Filename</i>, and other document attributes.
</p><p><b>To use the <i>Default</i> Stylesheet</b>:
</p>
<ol><li>Point your browser to <tt><a href="http://localhost:8080/SMILA/search" class="external free" title="http://localhost:8080/SMILA/search" rel="nofollow">http://localhost:8080/SMILA/search</a></tt>.
</li><li>Enter a word that you expect to be contained in your dummy files into the <i>Query</i> text field.
</li><li> Click <i>OK</i> to send your query to SMILA.
</li></ol>
<p><b>To use the <i>Advanced</i> Stylesheet</b>:
</p>
<ol><li>Point your browser to <tt><a href="http://localhost:8080/SMILA/search" class="external free" title="http://localhost:8080/SMILA/search" rel="nofollow">http://localhost:8080/SMILA/search</a></tt>.
</li><li>Click <i>Advanced</i> to switch to the detailed search form.
</li><li>For example, to find a file by its name, enter the file name into the <i>Filename</i> text field, then click <i>OK</i> to submit your search.
</li></ol>
<a name="Stop_indexing_job_run"></a><h2> <span class="mw-headline"> Stop indexing job run </span></h2>
<p>Although there's no need for it, we can finish our previously started indexing job run via REST client now:
(replace &lt;job-id&gt; with the job-id you got before when <a href="Documentation_for_5_Minutes_to_Success.html#Start_indexing_job_run" title="">you started the job run</a>).
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-javascript">#Request
POST http:<span class="co1">//localhost:8080/smila/jobmanager/jobs/indexUpdate/&lt;job-id&gt;/finish</span></pre></div>
<p>You can monitor the job run via your browser to see that it has finished successfully:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-javascript">#Request
GET http:<span class="co1">//localhost:8080/smila/jobmanager/jobs/indexUpdate/&lt;job-id&gt;</span></pre></div>
<p>In the <tt>SMILA.log</tt> file you will see messages like this:
</p>
<pre>
INFO ... internal.JobRunEngineImpl - finish called for job 'indexUpdate', run '20110901-141457584011'
...
INFO ... internal.JobRunEngineImpl - Completing job run '20110901-141457584011' for job 'indexUpdate' with final state SUCCEEDED
</pre>
<p>Congratulations, you've just crawled the SMILA Eclipsepedia, indexed the pages and searched through them. For more, just visit <a href="Manual.html" class="mw-redirect" title="SMILA/Manual">SMILA Manual</a>.
</p>
<a name="Further_steps"></a><h2> <span class="mw-headline"> Further steps </span></h2>
<a name="Crawl_the_filesystem"></a><h3> <span class="mw-headline"> Crawl the filesystem </span></h3>
<p>SMILA has also a predefined job to crawl the file system ("crawlFilesystem"), but you will have to either adapt the predefined job to point it to a valid folder in your filesystem or create your own job.
</p><p>We will settle for the second option, because it does not need that you stop and restart SMILA.
</p>
<a name="Create_your_Job"></a><h4> <span class="mw-headline"> Create your Job </span></h4>
<p>POST the following job description to <a href="Documentation/JobDefinitions.html#List.2C_create.2C_modify_jobs" title="SMILA/Documentation/JobDefinitions">SMILA's Job API</a> at <tt><a href="http://localhost:8080/smila/jobmanager/jobs" class="external free" title="http://localhost:8080/smila/jobmanager/jobs" rel="nofollow">http://localhost:8080/smila/jobmanager/jobs</a></tt>. Adapt the <tt>rootFolder</tt> parameter to point to an existing folder on your machine where you have placed some files (e.g. plain text, office docs or HTML files). If your path includes backslashes, escape them with an additional backslash, e.g. <tt>c:\\data\\files</tt>.
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-javascript">#Request
POST http:<span class="co1">//localhost:8080/smila/jobmanager/jobs/</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;name&quot;</span>:<span class="st0">&quot;crawlFilesAtData&quot;</span>,
<span class="st0">&quot;workflow&quot;</span>:<span class="st0">&quot;fileCrawling&quot;</span>,
<span class="st0">&quot;parameters&quot;</span>:<span class="br0">&#123;</span>
<span class="st0">&quot;tempStore&quot;</span>:<span class="st0">&quot;temp&quot;</span>,
<span class="st0">&quot;dataSource&quot;</span>:<span class="st0">&quot;file&quot;</span>,
<span class="st0">&quot;rootFolder&quot;</span>:<span class="st0">&quot;/data&quot;</span>,
<span class="st0">&quot;jobToPushTo&quot;</span>:<span class="st0">&quot;indexUpdate&quot;</span>,
<span class="st0">&quot;mapping&quot;</span>:<span class="br0">&#123;</span>
<span class="st0">&quot;fileContent&quot;</span>:<span class="st0">&quot;Content&quot;</span>,
<span class="st0">&quot;filePath&quot;</span>:<span class="st0">&quot;Path&quot;</span>,
<span class="st0">&quot;fileName&quot;</span>:<span class="st0">&quot;Filename&quot;</span>,
<span class="st0">&quot;fileExtension&quot;</span>:<span class="st0">&quot;Extension&quot;</span>,
<span class="st0">&quot;fileLastModified&quot;</span>:<span class="st0">&quot;LastModifiedDate&quot;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span></pre></div>
<p><i>Hint: Not all file formats are supported by SMILA out-of-the-box. Have a look <a href="Documentation/TikaPipelet.html#Supported_document_types" title="SMILA/Documentation/TikaPipelet"> here</a> for details.</i>
</p>
<a name="Start_your_jobs"></a><h4> <span class="mw-headline"> Start your jobs </span></h4>
<ul><li>Start the <tt>indexUpdate</tt> (see <a href="Documentation_for_5_Minutes_to_Success.html#Start_indexing_job_run" title="">Start indexing job run</a>), if you have already stopped it. If it is still running, that's fine:
</li></ul>
<div style="margin-left: 1.5em;">
<div dir="ltr" style="text-align: left;"><pre class="source-javascript">#Request
POST http:<span class="co1">//localhost:8080/smila/jobmanager/jobs/indexUpdate/</span></pre></div>
</div>
<ul><li>Start your <tt>crawlFilesAtData</tt> job similar to <a href="Documentation_for_5_Minutes_to_Success.html#Start_the_crawler" title="">Start the crawler</a> but now use the job name <tt>crawlFilesAtData</tt> instead of <tt>crawlSmilaWiki</tt>. This new job behaves just like the web crawling job, but its run time might be shorter, depending on how much data actually is at your <span style="font-family:monospace;">rootFolder</span>.
</li></ul>
<div style="margin-left: 1.5em;">
<div dir="ltr" style="text-align: left;"><pre class="source-javascript">#Request
POST http:<span class="co1">//localhost:8080/smila/jobmanager/jobs/crawlFilesAtData/</span></pre></div>
</div>
<a name="Search_for_your_new_data"></a><h4> <span class="mw-headline"> Search for your new data </span></h4>
<ol><li>After the job run's finished, wait a bit, then check whether the data has been indexed (see <a href="Documentation_for_5_Minutes_to_Success.html#Search_the_index" title="">Search the index</a> for help).
</li><li>It is also a good idea to check the log file for errors.
</li></ol>
<a name="5_more_minutes_to_change_the_workflow"></a><h3> <span class="mw-headline"> 5 more minutes to change the workflow </span></h3>
<p>The <a href="Documentation/5_more_minutes_to_change_the_workflow.html" title="SMILA/Documentation/5 more minutes to change the workflow">5 more minutes to change the workflow</a> show how you can configure the system so that data from different data sources will go through different workflows and pipelines and will be indexed into different indices.
</p>
<pre>(see <a href="Documentation_for_5_Minutes_to_Success.html#Start_indexing_job_run" title="">Start indexing job run</a>), if you have already stopped it. If it is still running, that's fine:
</pre>
<!--
NewPP limit report
Preprocessor node count: 150/1000000
Post-expand include size: 1433/2097152 bytes
Template argument size: 813/2097152 bytes
#ifexist count: 0/100
-->
<!-- Saved in parser cache with key wikidb:pcache:idhash:15784-0!1!0!!en!2!edit=0 and timestamp 20130416113414 -->
<div class="printfooter">
Retrieved from "<a href="Documentation_for_5_Minutes_to_Success.html">http://wiki.eclipse.org/SMILA/Documentation_for_5_Minutes_to_Success</a>"</div>
<div id="catlinks"><p class='catlinks'><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Categories</a>: <span dir='ltr'><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></span> | <span dir='ltr'><a href="http://wiki.eclipse.org/Category:HowTo" title="Category:HowTo">HowTo</a></span></p></div> <!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<!-- Yoink of toolbox for phoenix moved up -->
</div>
</div>
<div id="clearFooter"/>
<div id="footer" >
<ul id="footernav">
<li class="first"><a href="http://www.eclipse.org/">Home</a></li>
<li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li>
<li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li>
</ul>
<span id="copyright">Copyright &copy; 2013 The Eclipse Foundation. All Rights Reserved</span>
<p id="footercredit">This page was last modified 07:47, 9 April 2013 by <a href="http://wiki.eclipse.org/User:Andreas.weber.empolis.com" title="User:Andreas.weber.empolis.com">Andreas Weber</a>. Based on work by <a href="http://wiki.eclipse.org/User:Juergen.schumacher.empolis.com" title="User:Juergen.schumacher.empolis.com">Juergen Schumacher</a>, <a href="http://wiki.eclipse.org/index.php?title=User:Andreas.weber.attensity.com&amp;action=edit" class="new" title="User:Andreas.weber.attensity.com">Andreas Weber</a> and <a href="http://wiki.eclipse.org/index.php?title=User:Juergen.schumacher.attensity.com&amp;action=edit" class="new" title="User:Juergen.schumacher.attensity.com">Juergen Schumacher</a> and <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation_for_5_Minutes_to_Success&amp;action=credits" title="SMILA/Documentation for 5 Minutes to Success">others</a>.</p>
<p id="footerviews">This page has been accessed 17,642 times.</p>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
var pageTracker = _gat._getTracker("UA-910670-4");
pageTracker._trackPageview();
</script>
<!-- <div class="visualClear"></div> -->
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served in 0.081 secs. --></body></html>