blob: 59a367ae3e22d938866fe48c7eff9a47c522d4cc [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir="ltr" class="client-nojs">
<head>
<meta charset="UTF-8" />
<title>SMILA/Documentation/5 more minutes to change the workflow - Eclipsepedia</title>
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="generator" content="MediaWiki 1.23.2" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/eclipse.org-common/themes/solstice/public/images/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (en)" />
<link rel="EditURI" type="application/rsd+xml" href="http://wiki.eclipse.org/api.php?action=rsd" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom feed" href="http://wiki.eclipse.org/index.php?title=Special:RecentChanges&amp;feed=atom" />
<link rel="stylesheet" href="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=mediawiki.legacy.commonPrint%2Cshared%7Cmediawiki.ui.button&amp;only=styles&amp;skin=solstice&amp;*" />
<link rel="stylesheet" href="http://wiki.eclipse.org/skins/solstice/public/stylesheets/styles.min.css?303" media="screen, print" /><meta name="ResourceLoaderDynamicStyles" content="" />
<style>a:lang(ar),a:lang(kk-arab),a:lang(mzn),a:lang(ps),a:lang(ur){text-decoration:none}
/* cache key: my_wiki:resourceloader:filter:minify-css:7:14ece53a42aa314864e5fd8c57f0d98f */</style>
<script src="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=startup&amp;only=scripts&amp;skin=solstice&amp;*"></script>
<script>if(window.mw){
mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"SMILA/Documentation/5_more_minutes_to_change_the_workflow","wgTitle":"SMILA/Documentation/5 more minutes to change the workflow","wgCurRevisionId":381597,"wgRevisionId":381597,"wgArticleId":35503,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["SMILA","HowTo"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRelevantPageName":"SMILA/Documentation/5_more_minutes_to_change_the_workflow","wgIsProbablyEditable":false,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgWikiEditorEnabledModules":{"toolbar":false,"dialogs":false,"hidesig":true,"preview":false,"previewDialog":false,"publish":false},"wgCategoryTreePageCategoryOptions":"{\"mode\":0,\"hideprefix\":20,\"showcount\":true,\"namespaces\":false}"});
}</script><script>if(window.mw){
mw.loader.implement("user.options",function($,jQuery){mw.user.options.set({"ccmeonemails":0,"cols":80,"date":"default","diffonly":0,"disablemail":0,"editfont":"default","editondblclick":0,"editsectiononrightclick":0,"enotifminoredits":0,"enotifrevealaddr":0,"enotifusertalkpages":1,"enotifwatchlistpages":1,"extendwatchlist":0,"fancysig":0,"forceeditsummary":0,"gender":"unknown","hideminor":0,"hidepatrolled":0,"imagesize":2,"math":1,"minordefault":0,"newpageshidepatrolled":0,"nickname":"","norollbackdiff":0,"numberheadings":0,"previewonfirst":0,"previewontop":1,"rcdays":7,"rclimit":50,"rows":25,"showhiddencats":0,"shownumberswatching":1,"showtoolbar":1,"skin":"solstice","stubthreshold":0,"thumbsize":2,"underline":2,"uselivepreview":0,"usenewrc":0,"watchcreations":1,"watchdefault":1,"watchdeletion":0,"watchlistdays":3,"watchlisthideanons":0,"watchlisthidebots":0,"watchlisthideliu":0,"watchlisthideminor":0,"watchlisthideown":0,"watchlisthidepatrolled":0,"watchmoves":0,"wllimit":250,
"useeditwarning":1,"prefershttps":1,"language":"en","variant-gan":"gan","variant-iu":"iu","variant-kk":"kk","variant-ku":"ku","variant-shi":"shi","variant-sr":"sr","variant-tg":"tg","variant-uz":"uz","variant-zh":"zh","searchNs0":true,"searchNs1":false,"searchNs2":false,"searchNs3":false,"searchNs4":false,"searchNs5":false,"searchNs6":false,"searchNs7":false,"searchNs8":false,"searchNs9":false,"searchNs10":false,"searchNs11":false,"searchNs12":false,"searchNs13":false,"searchNs14":false,"searchNs15":false,"variant":"en"});},{},{});mw.loader.implement("user.tokens",function($,jQuery){mw.user.tokens.set({"editToken":"+\\","patrolToken":false,"watchToken":false});},{},{});
/* cache key: my_wiki:resourceloader:filter:minify-js:7:70d74423d3fc1e1c18fa9a1ff645a84a */
}</script>
<script>if(window.mw){
mw.loader.load(["mediawiki.page.startup","mediawiki.legacy.wikibits","mediawiki.legacy.ajax"]);
}</script>
<style type="text/css">/*<![CDATA[*/
.source-xml {line-height: normal;}
.source-xml li, .source-xml pre {
line-height: normal; border: 0px none white;
}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for xml
* CSS class: source-xml, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann
* (http://qbnz.com/highlighter/ and http://geshi.org/)
* --------------------------------------
*/
.xml.source-xml .de1, .xml.source-xml .de2 {font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;}
.xml.source-xml {font-family:monospace;}
.xml.source-xml .imp {font-weight: bold; color: red;}
.xml.source-xml li, .xml.source-xml .li1 {font-weight: normal; vertical-align:top;}
.xml.source-xml .ln {width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;}
.xml.source-xml .li2 {font-weight: bold; vertical-align:top;}
.xml.source-xml .es0 {color: #000099; font-weight: bold;}
.xml.source-xml .br0 {color: #66cc66;}
.xml.source-xml .sy0 {color: #66cc66;}
.xml.source-xml .st0 {color: #ff0000;}
.xml.source-xml .nu0 {color: #cc66cc;}
.xml.source-xml .sc-1 {color: #808080; font-style: italic;}
.xml.source-xml .sc0 {color: #00bbdd;}
.xml.source-xml .sc1 {color: #ddbb00;}
.xml.source-xml .sc2 {color: #339933;}
.xml.source-xml .sc3 {color: #009900;}
.xml.source-xml .re0 {color: #000066;}
.xml.source-xml .re1 {color: #000000; font-weight: bold;}
.xml.source-xml .re2 {color: #000000; font-weight: bold;}
.xml.source-xml .ln-xtra, .xml.source-xml li.ln-xtra, .xml.source-xml div.ln-xtra {background-color: #ffc;}
.xml.source-xml span.xtra { display:block; }
/*]]>*/
</style><meta name="viewport" content="width=device-width, initial-scale=1.0"></head>
<body class="mediawiki ltr sitedir-ltr ns-0 ns-subject page-SMILA_Documentation_5_more_minutes_to_change_the_workflow skin-solstice action-view" id="solstice">
<a class="sr-only" href="5_more_minutes_to_change_the_workflow.html#content">Skip to main content</a>
<div class="thin-header">
<header role="banner" class="hidden-print noprint">
<div class="container-fluid">
<div id="row-logo-search">
<div id="header-left">
<div class="row">
<div class="hidden-xs col-sm-6 logo-container">
<a href="https://www.eclipse.org/" ><img class="logo-eclipse-default" src="http://wiki.eclipse.org/skins/solstice/public/images/logo/eclipse-800x188.png" alt="Eclipsepedia"></a>
</div>
<div class="navbar col-sm-18 yamm" id="main-menu">
<div class="navbar-collapse collapse" id="navbar-collapse-1">
<ul class="nav navbar-nav">
<li><a target="_self" href="https://eclipse.org/downloads/">Download</a></li>
<li><a target="_self" href="https://eclipse.org/users/">Getting Started </a></li>
<li><a target="_self" href="https://eclipse.org/membership/">Members</a></li>
<li><a target="_self" href="https://eclipse.org/projects/">Projects</a></li>
<li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="5_more_minutes_to_change_the_workflow.html#">Community <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="http://marketplace.eclipse.org">Marketplace</a></li><li><a href="http://events.eclipse.org">Events</a></li><li><a href="http://www.planeteclipse.org/">Planet Eclipse</a></li><li><a href="https://eclipse.org/community/eclipse_newsletter/">Newsletter</a></li><li><a href="https://www.youtube.com/user/EclipseFdn">Videos</a></li></ul></li><li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="5_more_minutes_to_change_the_workflow.html#">Participate <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li><li><a href="https://eclipse.org/forums/">Forums</a></li><li><a href="https://eclipse.org/mail/">Mailing Lists</a></li><li><a href="https://wiki.eclipse.org/">Wiki</a></li><li><a href="https://wiki.eclipse.org/IRC">IRC</a></li><li><a href="https://eclipse.org/contribute/">How to Contribute</a></li></ul></li><li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="5_more_minutes_to_change_the_workflow.html#">Working Groups <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="http://wiki.eclipse.org/Auto_IWG">Automotive</a></li><li><a href="http://iot.eclipse.org">Internet of Things</a></li><li><a href="http://locationtech.org">LocationTech</a></li><li><a href="http://lts.eclipse.org">Long-Term Support</a></li><li><a href="http://polarsys.org">PolarSys</a></li><li><a href="http://science.eclipse.org">Science</a></li><li><a href="http://openmdm.org">OpenMDM</a></li></ul></li><!-- More -->
<li class="dropdown hidden-xs"><a class="dropdown-toggle" data-toggle="dropdown">More<b class="caret"></b></a>
<ul class="dropdown-menu">
<li>
<!-- Content container to add padding -->
<div class="yamm-content">
<div class="row">
<ul class="col-sm-8 list-unstyled"><li><p><strong>Community</strong></p></li><li><a href="http://marketplace.eclipse.org">Marketplace</a></li><li><a href="http://events.eclipse.org">Events</a></li><li><a href="http://www.planeteclipse.org/">Planet Eclipse</a></li><li><a href="https://eclipse.org/community/eclipse_newsletter/">Newsletter</a></li><li><a href="https://www.youtube.com/user/EclipseFdn">Videos</a></li></ul><ul class="col-sm-8 list-unstyled"><li><p><strong>Participate</strong></p></li><li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li><li><a href="https://eclipse.org/forums/">Forums</a></li><li><a href="https://eclipse.org/mail/">Mailing Lists</a></li><li><a href="https://wiki.eclipse.org/">Wiki</a></li><li><a href="https://wiki.eclipse.org/IRC">IRC</a></li><li><a href="https://eclipse.org/contribute/">How to Contribute</a></li></ul><ul class="col-sm-8 list-unstyled"><li><p><strong>Working Groups</strong></p></li><li><a href="http://wiki.eclipse.org/Auto_IWG">Automotive</a></li><li><a href="http://iot.eclipse.org">Internet of Things</a></li><li><a href="http://locationtech.org">LocationTech</a></li><li><a href="http://lts.eclipse.org">Long-Term Support</a></li><li><a href="http://polarsys.org">PolarSys</a></li><li><a href="http://science.eclipse.org">Science</a></li><li><a href="http://openmdm.org">OpenMDM</a></li></ul> </div>
</div>
</li>
</ul>
</li>
</ul>
</div>
<div class="navbar-header">
<button data-target="#navbar-collapse-1" data-toggle="collapse" class="navbar-toggle" type="button">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="https://www.eclipse.org/" class="visible-xs navbar-brand"><img class="logo-eclipse-default" src="http://wiki.eclipse.org/skins/solstice/public/images/logo/eclipse-800x188.png" alt="Eclipsepedia" width="174"></a>
</div>
</div>
</div>
</div>
</div>
</div>
</header>
<section class="defaut-breadcrumbs hidden-print noprint hidden-print clearfix" id="breadcrumb">
<div>
<ol class="breadcrumb">
<li><a href="https://www.eclipse.org/">Home</a></li>
<li><a href="http://wiki.eclipse.org/Main_Page">Eclipse Wiki</a></li>
<li class="active">SMILA/Documentation/5 more minutes to change the workflow</li></ol>
</div>
</section>
</div>
<div class="toolbar-menu breadcrumbs-offset noprint hidden-print margin-bottom-0 clearfix">
<div class="col-md-24">
<ol class="breadcrumb" role="navigation">
<li id="pt-login">
<a href="http://wiki.eclipse.org/index.php?title=Special:UserLogin&amp;returnto=SMILA%2FDocumentation%2F5+more+minutes+to+change+the+workflow">
<i class="fa fa-sign-in fa-fw orange"></i> Log in </a>
</li>
</ul>
</div>
</div>
<main role="main" class="background-grey">
<div class="container-full padding-top-25">
<!-- content -->
<section id="content" class="mw-body container-full clearfix 0">
<div id="mw-js-message" style="display:none;"></div>
<!-- bodyContent -->
<div id="bodyContent">
<!-- jumpto -->
<div id="jump-to-nav" class="mw-jump">
Jump to: <a href="5_more_minutes_to_change_the_workflow.html#mw-head">navigation</a>,
<a href="5_more_minutes_to_change_the_workflow.html#p-search">search</a>
</div>
<!-- /jumpto -->
<!-- leftcol -->
<aside class="col-md-4 noprint hidden-print" id="leftcol">
<form class="input-group" role="form" id="form-eclipse-search" action="http://wiki.eclipse.org/index.php" id="searchform">
<input id="searchInput" class="search-query form-control" type="search" accesskey="f" title="Special:Search" placeholder="Search" name="search" value="">
<span class="input-group-btn">
<button value="search" id="mw-searchButton" type="submit" class="btn btn-default" title="Search the pages for this text" name="fulltext">
<i class="fa fa-search"></i>
</button>
</span>
</form>
<select class="form-control margin-top-10 margin-bottom-10 visible-xs visible-sm" onchange="this.options[this.selectedIndex].value && (window.location = this.options[this.selectedIndex].value);"><option class="fw-700 "><span class="fw-700">---Navigation---</span></option><option value="/Main_Page">Main Page</option><option value="/Eclipsepedia:Community_portal">Community portal</option><option value="/Eclipsepedia:Current_events">Current events</option><option value="/Special:RecentChanges">Recent changes</option><option value="/Special:Random">Random page</option><option value="https://www.mediawiki.org/wiki/Special:MyLanguage/Help:Contents">Help</option></select><ul class="ul-left-nav fa-ul hidden-print leftnav hidden-xs hidden-sm"><li class="separator"><span class="separator">Navigation</span></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Main_Page" id="n-mainpage" title="Visit the main page [z]" accesskey="z">Main Page</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Eclipsepedia:Community_portal" id="n-portal" title="About the project, what you can do, where to find things">Community portal</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Eclipsepedia:Current_events" id="n-currentevents" title="Find background information on current events">Current events</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:RecentChanges" id="n-recentchanges" title="A list of recent changes in the wiki [r]" accesskey="r">Recent changes</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:Random" id="n-randompage" title="Load a random page [x]" accesskey="x">Random page</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="https://www.mediawiki.org/wiki/Special:MyLanguage/Help:Contents" id="n-help" title="The place to find out">Help</a></li></ul> <select class="form-control margin-top-10 margin-bottom-10 visible-xs visible-sm" onchange="this.options[this.selectedIndex].value && (window.location = this.options[this.selectedIndex].value);"><option class="fw-700 "><span class="fw-700">---Toolbox---</span></option><option value="/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;action=info">Page information</option><option value="/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;oldid=381597">Permanent link</option><option value="/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;printable=yes">Printable version</option><option value="/Special:SpecialPages">Special pages</option><option value="/Special:RecentChangesLinked/SMILA/Documentation/5_more_minutes_to_change_the_workflow">Related changes</option><option value="/Special:WhatLinksHere/SMILA/Documentation/5_more_minutes_to_change_the_workflow">What links here</option></select><ul class="ul-left-nav fa-ul hidden-print leftnav hidden-xs hidden-sm"><li class="separator"><span class="separator">Toolbox</span></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;action=info" id="t-info">Page information</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;oldid=381597" id="t-permalink" title="Permanent link to this revision of the page">Permanent link</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;printable=yes" id="t-print" rel="alternate" title="Printable version of this page [p]" accesskey="p">Printable version</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:SpecialPages" id="t-specialpages" title="A list of all special pages [q]" accesskey="q">Special pages</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:RecentChangesLinked/SMILA/Documentation/5_more_minutes_to_change_the_workflow" id="t-recentchangeslinked" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:WhatLinksHere/SMILA/Documentation/5_more_minutes_to_change_the_workflow" id="t-whatlinkshere" title="A list of all wiki pages that link here [j]" accesskey="j">What links here</a></li></ul> </aside>
<!-- /leftcol -->
<!-- mainContent -->
<div id="mainContent" class="col-md-20">
<ul class="nav nav-tabs noprint hidden-print" role="tablist">
<li id="ca-nstab-main" class="active"><a href="5_more_minutes_to_change_the_workflow.html" title="View the content page [c]" accesskey="c" tabindex="-1">Page</a></li>
<li id="ca-talk" class="new"><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;action=edit&amp;redlink=1" title="Discussion about the content page [t]" accesskey="t" tabindex="-1">Discussion</a></li>
<li id="ca-viewsource"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;action=edit" title="This page is protected.&#10;You can view its source [e]" accesskey="e" tabindex="-1">View source</a></li>
<li id="ca-history" class="collapsible"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;action=history" title="Past revisions of this page [h]" accesskey="h" tabindex="-1">History</a></li>
</ul> <div class="tab-content background-white">
<div id="tab-pane-main-page-content" class="tab-pane active">
<h1 id="firstHeading" class="firstHeading page-header">
<span dir="auto">SMILA/Documentation/5 more minutes to change the workflow</span>
</h1>
<div id="main-page-content">
<!-- subtitle -->
<div id="contentSub" class="alert alert-small alert-warning"><span class="subpages">&lt; <a href="../../SMILA.html" title="SMILA">SMILA</a>&lrm; | <a href="../Documentation.1.html" title="SMILA/Documentation">Documentation</a></span></div>
<!-- /subtitle -->
<div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr"><p><br />
</p>
<div id="toc" class="toc"><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1 tocsection-1"><a href="5_more_minutes_to_change_the_workflow.html#Just_another_5_minutes_to_change_the_workflow"><span class="tocnumber">1</span> <span class="toctext">Just another 5 minutes to change the workflow</span></a>
<ul>
<li class="toclevel-2 tocsection-2"><a href="5_more_minutes_to_change_the_workflow.html#Configure_new_solr_index"><span class="tocnumber">1.1</span> <span class="toctext">Configure new solr index</span></a></li>
<li class="toclevel-2 tocsection-3"><a href="5_more_minutes_to_change_the_workflow.html#Create_a_new_indexing_script"><span class="tocnumber">1.2</span> <span class="toctext">Create a new indexing script</span></a></li>
<li class="toclevel-2 tocsection-4"><a href="5_more_minutes_to_change_the_workflow.html#Create_and_start_a_new_indexing_job"><span class="tocnumber">1.3</span> <span class="toctext">Create and start a new indexing job</span></a></li>
<li class="toclevel-2 tocsection-5"><a href="5_more_minutes_to_change_the_workflow.html#Create_a_new_web_crawl_job"><span class="tocnumber">1.4</span> <span class="toctext">Create a new web crawl job</span></a></li>
<li class="toclevel-2 tocsection-6"><a href="5_more_minutes_to_change_the_workflow.html#Put_it_all_together"><span class="tocnumber">1.5</span> <span class="toctext">Put it all together</span></a></li>
</ul>
</li>
<li class="toclevel-1 tocsection-7"><a href="5_more_minutes_to_change_the_workflow.html#Configuration_overview"><span class="tocnumber">2</span> <span class="toctext">Configuration overview</span></a></li>
</ul>
</div>
<h1><span class="mw-headline" id="Just_another_5_minutes_to_change_the_workflow">Just another 5 minutes to change the workflow</span></h1>
<p>In the <a href="../5_Minutes_Tutorial.html" title="SMILA/5 Minutes Tutorial">5 minutes tutorial</a> all data collected by crawlers was processed with the same asynchronous "indexUpdate" workflow using the script "add.js". All data was indexed into the same solr/lucene index "DefaultCore".
It is possible however, to configure SMILA so that data from different data sources will go through different workflows and pipelines and will be indexed into different indices. This will require more advanced configuration features than before but still quite simple ones.
</p><p>In the following sections we are going to use the generic asynchronous "indexWithScript" workflow which let you specify the JavaScript script to process the data. We create an additional script for webcrawler records so that webcrawler data will be indexed into a separate search index named "webCollection".
</p>
<h2><span class="mw-headline" id="Configure_new_solr_index">Configure new solr index</span></h2>
<table width="100%" style="background-color:#d8e4f1; padding-left:30px;">
<tr>
<td>
<p>Please shutdown SMILA now if it's still running.
</p>
</td></tr></table>
<p>To configure your own search index "webCollection" copy the <tt>collection1</tt> configuration folder (see <tt>SMILA/configuration/org.eclipse.smila.solr/solr_home</tt>) with the name <tt>webCollection</tt>, in the same directory, delete <tt>date</tt> folder and adapt <tt>core.properties</tt> file.
</p><p>Afterwards add your new core to the file <tt>SMILA.application/configuration/org.eclipse.smila.solr/solr-config.json</tt>:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="xml source-xml"><pre class="de1">{
&quot;mode&quot;:&quot;embedded&quot;,
&quot;idFields&quot;:{
&quot;collection1&quot;:&quot;_recordid&quot;,
&quot;webCollection&quot;:&quot;_recordid&quot;
},
...
}</pre></div></div>
<table width="100%" style="background-color:#d8e4f1; padding-left:30px;">
<tr>
<td>
<p>Please restart SMILA now.
</p>
</td></tr></table>
<p><b>Further information:</b>: For more information about the Solr indexing, please see the <a href="Solr_4.x.html" title="SMILA/Documentation/Solr 4.x">SMILA Solr 4.x documentation</a>.
</p>
<h2><span class="mw-headline" id="Create_a_new_indexing_script">Create a new indexing script</span></h2>
<p>We need to add a new script for adding the imported webcrawler records.
Predefined scripts are contained in the <tt>configuration/org.eclipse.smila.scripting/js</tt> directory. We can add new scripts by just adding them there.
</p><p>Copy the script "add.js", name the copy "addWeb.js" and change the index name "indexname" in there from "collection1" to "webCollection":
</p>
<pre>
...
var solrIndexPipelet = pipelets.create(&quot;org.eclipse.smila.solr.update.SolrUpdatePipelet&quot;, {
&quot;indexname&quot;&#160;: &quot;webCollection&quot;,
...
</pre>
<p><b>Further information:</b> For more information about Scripting please check the <a href="Scripting.html" title="SMILA/Documentation/Scripting">Scripting</a> documentation.
</p>
<h2><span class="mw-headline" id="Create_and_start_a_new_indexing_job">Create and start a new indexing job</span></h2>
<p>We define a new indexing job based on the predefined asynchronous workflow "indexWithScript" (see <tt>SMILA/configuration/org.eclipse.smila.jobmanager/workflows.json</tt>). This indexing job will process the imported data by using our new script "addWeb.js".
</p><p>The "indexWithScript" workflow contains a <a href="Worker/ScriptProcessorWorker.html" title="SMILA/Documentation/Worker/ScriptProcessorWorker">ScriptProcessorWorker worker</a> which is not configured for a dedicated script, so the scripts handling adds and deletes have to be set via job parameter.
</p><p>Use your favourite REST Client to create an appropriate job definition:
</p>
<pre>
POST http://localhost:8080/smila/jobmanager/jobs/
{
&quot;name&quot;:&quot;indexWebJob&quot;,
&quot;parameters&quot;:{
&quot;tempStore&quot;: &quot;temp&quot;,
&quot;addScript&quot;: &quot;addWeb&quot;,
&quot;deleteScript&quot;: &quot;delete&quot;
},
&quot;workflow&quot;:&quot;indexWithScript&quot;
}
</pre>
<p>Notes:
</p>
<ul>
<li> the "deleteScript" is not needed for our test scenario here, but we must fulfill all undefined workflow parameters.
</li>
<li> in the add and the delete script we use the standard function ("process"), so we don't have to set/change this via parameter.
</li>
</ul>
<p>Afterwards, start a job run for the defined job:
</p>
<pre>
POST http://localhost:8080/smila/jobmanager/jobs/indexWebJob
</pre>
<h2><span class="mw-headline" id="Create_a_new_web_crawl_job">Create a new web crawl job</span></h2>
<p>Since the predefined web crawl job pushes the crawled records to the <tt>indexUpdate</tt> job, we create a new job here using our new indexing job.
</p>
<pre>
POST http://localhost:8080/smila/jobmanager/jobs/
{
&quot;name&quot;:&quot;crawlWikiToWebCore&quot;,
&quot;workflow&quot;:&quot;webCrawling&quot;,
&quot;parameters&quot;:{
&quot;tempStore&quot;:&quot;temp&quot;,
&quot;dataSource&quot;:&quot;web&quot;,
&quot;jobToPushTo&quot;:&quot;indexWebJob&quot;,
&quot;startUrl&quot;:&quot;http://wiki.eclipse.org/SMILA&quot;,
&quot;linksPerBulk&quot;: 100,
&quot;filters&quot;:{
&quot;urlPatterns&quot;: {
&quot;include&quot;: [&quot;http://wiki\\.eclipse\\.org/SMILA.*&quot;,
&quot;http://wiki\\.eclipse\\.org/Image:.*&quot;,
&quot;http://wiki\\.eclipse\\.org/images/.*&quot;],
&quot;exclude&quot;: [&quot;.*\\?.*&quot;,
&quot;http://wiki\\.eclipse\\.org/images/archive/.*&quot;,
&quot;.*\\.java&quot;]
}
},
&quot;mapping&quot;: {
&quot;httpCharset&quot;: &quot;Charset&quot;,
&quot;httpContenttype&quot;: &quot;ContentType&quot;,
&quot;httpLastModified&quot;: &quot;LastModifiedDate&quot;,
&quot;httpMimetype&quot;: &quot;MimeType&quot;,
&quot;httpSize&quot;: &quot;Size&quot;,
&quot;httpUrl&quot;: &quot;Url&quot;,
&quot;httpContent&quot;: &quot;Content&quot;
}
}
}</pre>
<p>Please note that we used the following line to let the crawl job push the records to our new indexing job:
</p>
<pre>
&quot;jobToPushTo&quot;:&quot;indexWebJob&quot;
</pre>
<p>Now start the crawl job:
</p>
<pre>
POST http://localhost:8080/smila/jobmanager/jobs/crawlWikiToWebCore
</pre>
<p>After some time to crawl, process and commit the data, you can have another look at the <a rel="nofollow" class="external text" href="http://localhost:8080/SMILA/search">SMILA search page</a> to find your new core listed among the available cores, and if you choose it, you can search for e.g. "SMILA" in the new webCollection.
</p>
<h2><span class="mw-headline" id="Put_it_all_together">Put it all together</span></h2>
<p>Ok, now it seems that we have finally finished configuring SMILA for using separate scripts for file system and web crawling and index data from these crawlers into different indices.
Here is what we have done so far:
</p>
<ol>
<li> We added the <tt>webCollection</tt> index to the Solr configuration.
</li>
<li> We created a new JavaScript script for Web crawler data referencing the new Solr index.
</li>
<li> We used a separate job for web indexing that references the new script.
</li>
<li> We used a separate web crawl job to push the records to the new indexing job.
</li>
</ol>
<h1><span class="mw-headline" id="Configuration_overview">Configuration overview</span></h1>
<p>SMILA configuration files are located in the <tt>configuration</tt> directory of the SMILA application.
The following lists the configuration files and documentation links relevant to this tutorial, regarding SMILA components:
</p><p><b>Jobmanager</b>
</p>
<ul>
<li> configuration folder: <tt>org.eclipse.smila.jobmanager</tt>
<ul>
<li> <tt>workflows.json</tt> (Predefined asynchronous workflows)
</li>
<li> <tt>jobs.json</tt> (Predefined jobs)
</li>
</ul>
</li>
<li> Documentation
<ul>
<li> <a href="JobManager.html" title="SMILA/Documentation/JobManager">JobManager</a>
</li>
</ul>
</li>
<li> REST API: <a rel="nofollow" class="external free" href="http://localhost:8080/smila/jobmanager">http://localhost:8080/smila/jobmanager</a>
</li>
</ul>
<p><b>Scripting</b>
</p>
<ul>
<li> configuration folder: <tt>org.eclipse.smila.processing.scripts</tt>
<ul>
<li> <tt>js/</tt> (Predefined JavaScript scripts)
</li>
</ul>
</li>
<li> Documentation
<ul>
<li> <a href="Scripting.html" title="SMILA/Documentation/Scripting">Scripting</a>
</li>
<li> <a href="Scripting.html#ScriptProcessorWorker" title="SMILA/Documentation/Scripting">ScriptProcessorWorker</a>
</li>
</ul>
</li>
</ul>
<ul>
<li> REST API: <a rel="nofollow" class="external free" href="http://localhost:8080/smila/script">http://localhost:8080/smila/script</a>
</li>
</ul>
<p><b>Solr</b>
</p>
<ul>
<li> DataDictionary
<ul>
<li> configuration folder: <tt>org.eclipse.smila.solr</tt>
</li>
</ul>
</li>
<li> Documentation
<ul>
<li> <a href="Solr_4.x.html" title="SMILA/Documentation/Solr 4.x">SMILA/Documentation/Solr_4.x</a>
</li>
</ul>
</li>
</ul>
<!--
NewPP limit report
CPU time usage: 0.060 seconds
Real time usage: 0.068 seconds
Preprocessor visited node count: 90/1000000
Preprocessor generated node count: 172/1000000
Post‐expand include size: 0/2097152 bytes
Template argument size: 0/2097152 bytes
Highest expansion depth: 2/40
Expensive parser function count: 0/100
-->
<!-- Saved in parser cache with key my_wiki:pcache:idhash:35503-0!*!0!!en!*!* and timestamp 20150414084524 and revision id 381597
-->
</div>
<!-- catlinks -->
<div id='catlinks' class='catlinks'><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Categories</a>: <ul><li><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></li><li><a href="http://wiki.eclipse.org/Category:HowTo" title="Category:HowTo">HowTo</a></li></ul></div></div> <!-- /catlinks -->
</div>
</div>
</div>
</div>
<!-- /maincontent -->
<!-- printfooter -->
<div class="printfooter">
Retrieved from "<a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;oldid=381597">http://wiki.eclipse.org/index.php?title=SMILA/Documentation/5_more_minutes_to_change_the_workflow&amp;oldid=381597</a>" </div>
<!-- /printfooter -->
<!-- debughtml -->
<!-- /debughtml -->
</div>
<!-- /bodyContent -->
</section>
<!-- /content -->
<!-- footer -->
</div> <section id="footer-contribution-info" style="border-top:1px solid #ccc;" class="footer-offset background-white margin-top-25"><div class="container text-center padding-top-10 padding-bottom-10"><p id="footercredit">This page was last modified 06:30, 9 April 2015 by <a href="http://wiki.eclipse.org/index.php?title=User:Julia.kudrin.empolis.com&amp;action=edit&amp;redlink=1" class="new" title="User:Julia.kudrin.empolis.com (page does not exist)">Julia Kudrin</a>. Based on work by <a href="http://wiki.eclipse.org/User:Juergen.schumacher.empolis.com" title="User:Juergen.schumacher.empolis.com">Juergen Schumacher</a>, <a href="http://wiki.eclipse.org/User:Andreas.weber.empolis.com" title="User:Andreas.weber.empolis.com">Andreas Weber</a> and <a href="http://wiki.eclipse.org/User:Andreas.schank.attensity.com" title="User:Andreas.schank.attensity.com">A. Schank</a>.</p><p id="footerviews">This page has been accessed 2,724 times.</p></div></section> </main> <!-- /#main-content-container-row -->
<p id="back-to-top" class="noprint hidden-print">
<a class="visible-xs" href="5_more_minutes_to_change_the_workflow.html#top">Back to the top</a>
</p>
<footer role="contentinfo" class="noprint hidden-print">
<div class="container">
<div class="row">
<section id="footer-eclipse-foundation" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Eclipse Foundation</h2>
<ul class="nav">
<li><a href="https://eclipse.org/org/">About us</a></li>
<li><a href="https://eclipse.org/org/foundation/contact.php">Contact Us</a></li>
<li><a href="https://eclipse.org/donate">Donate</a></li>
<li><a href="https://eclipse.org/org/documents/">Governance</a></li>
<li><a href="https://eclipse.org/artwork/">Logo and Artwork</a></li>
<li><a href="https://eclipse.org/org/foundation/directors.php">Board of Directors</a></li>
</ul>
</section>
<section id="footer-legal" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Legal</h2>
<ul class="nav">
<li><a href="https://eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="https://eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="https://eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="https://eclipse.org/org/documents/epl-v10.php">Eclipse Public License </a></li>
<li><a href="https://eclipse.org/legal/">Legal Resources </a></li>
</ul>
</section>
<section id="footer-useful-links" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Useful Links</h2>
<ul class="nav">
<li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li>
<li><a href="http://help.eclipse.org/">Documentation</a></li>
<li><a href="https://eclipse.org/contribute/">How to Contribute</a></li>
<li><a href="https://eclipse.org/mail/">Mailing Lists</a></li>
<li><a href="https://eclipse.org/forums/">Forums</a></li>
<li><a href="http://marketplace.eclipse.org/">Marketplace</a></li>
</ul>
</section>
<section id="footer-other" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Other</h2>
<ul class="nav">
<li><a href="https://eclipse.org/ide/">IDE and Tools</a></li>
<li><a href="https://eclipse.org/projects">Community of Projects</a></li>
<li><a href="https://eclipse.org/org/workinggroups/">Working Groups</a></li>
</ul>
<ul class="list-inline social-media">
<li><a href="https://twitter.com/EclipseFdn"><i class="fa fa-twitter-square"></i></a></li>
<li><a href="https://plus.google.com/+Eclipse"><i class="fa fa-google-plus-square"></i></a></li>
<li><a href="https://www.facebook.com/eclipse.org"><i class="fa fa-facebook-square"></i> </a></li>
<li><a href="https://www.youtube.com/user/EclipseFdn"><i class="fa fa-youtube-square"></i></a></li>
</ul>
</section>
<div id="copyright" class="col-xs-offset-1 col-sm-14 col-md-24 col-md-offset-0">
<div>
<span><img src="http://eclipse.org/eclipse.org-common/themes/solstice/public/images/logo/eclipse-logo-bw-800x188.png" alt="Eclipse.org black and white logo" width="166" height="39" id="logo-eclipse-white"/></span>
<p id="copyright-text">Copyright &copy; 2014 The Eclipse Foundation. All Rights Reserved.</p>
</div>
</div>
<a href="5_more_minutes_to_change_the_workflow.html#" class="scrollup">Back to the top</a>
</div>
</div>
</footer>
<script src="http://wiki.eclipse.org/skins/solstice/public/javascript/main.min.js"></script>
<!-- Placed at the end of the document so the pages load faster -->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-910670-2']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script> <!-- /footer -->
<script>/*<![CDATA[*/window.jQuery && jQuery.ready();/*]]>*/</script><script>if(window.mw){
mw.loader.state({"skins.solstice":"loading","site":"ready","user":"ready","user.groups":"ready"});
}</script>
<script src="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=skins.solstice&amp;only=scripts&amp;skin=solstice&amp;*"></script>
<script>if(window.mw){
mw.loader.load(["mediawiki.action.view.postEdit","mediawiki.user","mediawiki.hidpi","mediawiki.page.ready","mediawiki.searchSuggest"],null,true);
}</script>
<script>if(window.mw){
mw.config.set({"wgBackendResponseTime":425});
}</script> </body>
</html>