blob: ba50d375729f580d23899f452dd922445a316e58 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="SMILA/Documentation/Importing/UpdatePusher,SMILA/Documentation/Bulkbuilder,SMILA/Documentation/HowTo/How to access the REST API with the RestClient,SMILA/Documentation/Importing/Concept" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/Documentation/Importing/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" />
<link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=rss" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=atom" />
<title>SMILA/Documentation/Importing/UpdatePusher - Eclipsepedia</title>
<style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "http://wiki.eclipse.org/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style>
<link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" />
<link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" />
<!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]-->
<!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]-->
<!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]-->
<!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]-->
<!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]-->
<!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "eclipsenova";
var stylepath = "/skins";
var wgArticlePath = "/$1";
var wgScriptPath = "";
var wgScript = "/index.php";
var wgServer = "http://wiki.eclipse.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "SMILA/Documentation/Importing/UpdatePusher";
var wgTitle = "SMILA/Documentation/Importing/UpdatePusher";
var wgAction = "view";
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
var wgArticleId = "34873";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = "332667";
var wgVersion = "1.12.0";
var wgEnableAPI = true;
var wgEnableWriteAPI = false;
/*]]>*/</script>
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script>
<!-- Performance mods similar to those for bug 166401 -->
<script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=eclipsenova"><!-- site js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script>
<style type="text/css">/*<![CDATA[*/
.source-javascript {line-height: normal; font-size: medium;}
.source-javascript li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for javascript
* CSS class: source-javascript, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-javascript .de1, .source-javascript .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-javascript {}
.source-javascript .head {}
.source-javascript .foot {}
.source-javascript .imp {font-weight: bold; color: red;}
.source-javascript .ln-xtra {color: #cc0; background-color: #ffc;}
.source-javascript li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-javascript li.li2 {font-weight: bold;}
.source-javascript .kw1 {color: #000066; font-weight: bold;}
.source-javascript .kw2 {color: #003366; font-weight: bold;}
.source-javascript .kw3 {color: #000066;}
.source-javascript .co1 {color: #009900; font-style: italic;}
.source-javascript .coMULTI {color: #009900; font-style: italic;}
.source-javascript .es0 {color: #000099; font-weight: bold;}
.source-javascript .br0 {color: #66cc66;}
.source-javascript .st0 {color: #3366CC;}
.source-javascript .nu0 {color: #CC0000;}
.source-javascript .me1 {color: #006600;}
.source-javascript .sc0 {}
.source-javascript .sc1 {}
.source-javascript .sc2 {}
.source-javascript .sc3 {}
.source-javascript .re0 {color: #0066FF;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "http://wiki.eclipse.org/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><link rel="stylesheet" type="text/css" href="UpdatePusher.html" /> </head>
<body class="mediawiki ns-0 ltr page-SMILA_Documentation_Importing_UpdatePusher">
<div id="globalWrapper">
<div id="column-one">
<!-- Eclipse Additions for the Top Nav start here M. Ward-->
<div id="header">
<div id="header-graphic">
<img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki">
</div>
<!-- Pulled 101409 Mward -->
<div class="portlet" id="p-personal">
<div class="pBody">
<ul>
<li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/Importing/UpdatePusher">Log in</a></li>
</ul>
</div>
</div>
<div id="header-icons">
<div id="sites">
<ul id="sitesUL">
<li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li>
<li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li>
<li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li>
<li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li>
<li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li>
<li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li>
</ul>
</div>
</div>
</div>
<!-- NEW HEADER STUFF HERE -->
<div id="header-menu">
<div id="header-nav">
<ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li>
<li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li>
<li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li>
<li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li>
<li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li>
<li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li>
<li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li>
</ul>
</div>
<div id="header-utils">
<!-- moved the search window here -->
<form action="http://wiki.eclipse.org/Special:Search" >
<input class="input" name="search" type="text" accesskey="f" value="" />
<input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" />&nbsp;
<input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" />
</form>
</div>
</div>
<!-- Eclipse Additions for the Header stop here -->
<!-- Additions and mods for leftside nav Start here -->
<!--Started nav rip here-->
<!-- these are the nav controls main page, changes etc -->
<div id="novaContent" class="faux">
<div id="leftcol">
<ul id="leftnav">
<!-- these are the page controls, edit history etc -->
<li class="separator"><a class="separator">Navigation &#160;&#160;</li>
<li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li>
<li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li>
<li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li>
<li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li>
<li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li>
<li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li>
<li class="separator"><a class="separator">Toolbox &#160;&#160;</a></li>
<li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Documentation/Importing/UpdatePusher">What links here</a></li>
<li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Documentation/Importing/UpdatePusher">Related changes</a></li>
<!-- This is the toolbox section -->
<li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li>
<li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li>
<li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/UpdatePusher&amp;printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/UpdatePusher&amp;oldid=332667">Permanent link</a></li> </ul>
</div>
<!-- Additions and mods for leftside nav End here -->
<div id="column-content">
<div id="content">
<a name="top" id="top"></a>
<div id="tabs">
<ul class="primary">
<li class="active"><a href="UpdatePusher.html"><span class="tab">Page</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Documentation/Importing/UpdatePusher&amp;action=edit"><span class="tab">Discussion</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/UpdatePusher&amp;action=edit"><span class="tab">View source</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/UpdatePusher&amp;action=history"><span class="tab">History</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/Importing/UpdatePusher"><span class="tab">Edit</span></a></li>
</ul>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<h1 class="firstHeading">SMILA/Documentation/Importing/UpdatePusher</h1>
<div id="bodyContent">
<h3 id="siteSub">From Eclipsepedia</h3>
<div id="contentSub"><span class="subpages">&lt; <a href="../../../SMILA.html" title="SMILA">SMILA</a> | <a href="../../Documentation.1.html" title="SMILA/Documentation">Documentation</a></span></div>
<div id="jump-to-nav">Jump to: <a href="UpdatePusher.html#column-one">navigation</a>, <a href="UpdatePusher.html#searchInput">search</a></div> <!-- start content -->
<a name="Worker_description"></a><h3> <span class="mw-headline"> Worker description </span></h3>
<ul><li> Name: <tt>updatePusher</tt>
</li><li> Parameters:
<ul><li> <tt>jobToPushTo</tt>: The job to push the crawled records to. If set, a job with this worker will not be started, if the <tt>jobToPushTo</tt> is not running in the same SMILA instance.
</li><li> <tt>remote</tt>: A description of a SMILA REST API to push the records to. This parameter must hold a map that contains:
<ul><li> <tt>endpoints</tt>: A list of strings with the hosts and ports of the SMILA servers to push to. Usually the UpdatePusher will try for each task to send the records to the first endpoint first, until this one cannot be reached anymore. It will not return to the first host as long as the failover host is working. In this case it will failover to the second host and so on. If for one record none of the endpoint hosts can be reached, the task will fail with an recoverable error so it can be retried later, until the maximum number of retries has been reached. The UpdatePusher uses the <a href="../HowTo/How_to_access_the_REST_API_with_the_RestClient.html#Interfaces_and_default_implementations" title="SMILA/Documentation/HowTo/How to access the REST API with the RestClient">SMILA FailoverRestClient</a> for sending the records and handling the failover.
</li><li> <tt>urlPath</tt>: The REST API to talk to, usually this will be something like <tt>/smila/job/indexUpdate/record</tt>, i.e. the <a href="../Bulkbuilder.html#Record_push_REST_API" title="SMILA/Documentation/Bulkbuilder">BulkBuilder REST API</a>. But it's possible send added and updated records to every REST API that accepts POST requests with a JSON record in request body. Deleted records are sent as DELETE requests to <tt>&lt;urlPath&gt;?_recordid=&lt;recordid&gt;</tt>, so to be able to do delta importing including deletes, the target resource must acceppt such requests, too. The <a href="../Bulkbuilder.html#Record_push_REST_API" title="SMILA/Documentation/Bulkbuilder">BulkBuilder REST API</a> is currently the only API that accepts such requests. There is no check if the URL path specifies a valid resource or a running job, so you can start the crawl job with an invalid URI and it will fail in an unspecified way during execution.
</li></ul>
</li></ul>
</li></ul>
<div class="messagebox" style="background-color: #def3fe; border: 1px solid #c5d7e0; color: black; padding: 5px; margin: 1ex 0; min-height: 35px; padding-left: 45px;">
<div style="float: left; margin-left: -40px;"><a href="http://wiki.eclipse.org/Image:Idea.png" class="image" title="Idea.png"><img alt="" src="http://wiki.eclipse.org/images/a/a4/Idea.png" width="35" height="35" border="0" /></a></div>
<div><b>You need to specify </b>either<b> the <tt>jobToPushTo</tt> parameter </b>or<b> the <tt>remote</tt> section or the job definition will be rejected. For example, these two fragments would be OK for a valid job definition:</b>
<div dir="ltr" style="text-align: left;"><pre class="source-javascript"><span class="br0">&#123;</span>
<span class="st0">&quot;parameters&quot;</span>: <span class="br0">&#123;</span>
...
<span class="st0">&quot;jobToPushTo&quot;</span>: <span class="st0">&quot;indexUpdate&quot;</span>,
...
<span class="br0">&#125;</span>
<span class="br0">&#125;</span></pre></div>
<p><b>OR</b>
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-javascript"><span class="br0">&#123;</span>
<span class="st0">&quot;parameters&quot;</span>: <span class="br0">&#123;</span>
...
<span class="st0">&quot;remote&quot;</span>: <span class="br0">&#123;</span>
<span class="st0">&quot;endpoints&quot;</span>: <span class="br0">&#91;</span> <span class="st0">&quot;smila-host-1:8080&quot;</span>, <span class="st0">&quot;smila-host-2:8080&quot;</span> <span class="br0">&#93;</span>,
<span class="st0">&quot;urlPath&quot;</span>: <span class="st0">&quot;/smila/job/indexUpdate/record&quot;</span>
<span class="br0">&#125;</span>,
...
<span class="br0">&#125;</span>
<span class="br0">&#125;</span></pre></div>
<b><br /></div></b>
</div>
<ul><li><ul><li> <tt>deltaImportStrategy</tt>: Configure usage of the DeltaService by this worker. There are four possible values, two of them have the same effect on this worker (see <a href="Concept.html#Delta_Delete" title="SMILA/Documentation/Importing/Concept">DeltaDelete</a> for an overview):
<ul><li> <tt>none</tt>: do <b>not</b> record delta information, do <b>not</b> perform delta-delete in the completion phase of the job.
</li><li> <tt>initial</tt> or <tt>additive</tt>: record delta information, but do <b>not</b> perform delta-delete in the completion phase of the job.
</li><li> <tt>full</tt>: default mode, record delta information <b>and</b> perform delta-delete in the completion phase of the job.
</li></ul>
</li></ul>
</li><li> Input Slots:
<ul><li> <tt>recordToPush</tt>: a bucket of type <tt>recordBulks</tt> containing the records produced by the crawl workflow.
</li></ul>
</li><li> Output Slots:
<ul><li> <tt>pushedRecords</tt>: (optional) the records that could be successfully submitted to the destination job. Usually not set, but may be used to trigger further actions on submitted records.
</li></ul>
</li></ul>
<p><br />
The UpdatePusher takes each record from the input, sends it to a bulkbuilder service. If an output bucket is connected the record is written to it. If the record contains a <tt>_deltaHash</tt> attribute value, the worker checks with DeltaService if the record has not been pushed yet to prevent duplicates, and marks it updated afterwards (if enabled, see above). If the <tt>_deltaHash</tt> attribute is empty, the record is pushed always and not marked as updated in DeltaService.
</p><p>Exception handling of bulkbuilder errors:
</p>
<ul><li> If an InvalidRecordException is thrown by Bulkbuilder it is logged and the record is skipped (and is also not added to the output bulk, if set).
</li><li> Other BulkbuilderExceptions are not catched. If they are marked as recoverable they should lead to an retry of the task, else the task will fail fatal.
</li></ul>
<p>If enabled (parameter <tt>deltaImportStrategy="full"</tt> or not set), the worker scans the DeltaService in the completion phase of the job run for records that must be sent to the BulkBuilder as "deleted records" and removes these entries from the DeltaServer afterwards. See <a href="Concept.html#Delta_Delete" title="SMILA/Documentation/Importing/Concept">DeltaDelete</a> for details.
</p>
<!--
NewPP limit report
Preprocessor node count: 64/1000000
Post-expand include size: 1497/2097152 bytes
Template argument size: 965/2097152 bytes
#ifexist count: 0/100
-->
<!-- Saved in parser cache with key wikidb:pcache:idhash:34873-0!1!0!!en!2!edit=0 and timestamp 20130416060954 -->
<div class="printfooter">
Retrieved from "<a href="UpdatePusher.html">http://wiki.eclipse.org/SMILA/Documentation/Importing/UpdatePusher</a>"</div>
<div id="catlinks"><p class='catlinks'><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Category</a>: <span dir='ltr'><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></span></p></div> <!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<!-- Yoink of toolbox for phoenix moved up -->
</div>
</div>
<div id="clearFooter"/>
<div id="footer" >
<ul id="footernav">
<li class="first"><a href="http://www.eclipse.org/">Home</a></li>
<li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li>
<li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li>
</ul>
<span id="copyright">Copyright &copy; 2013 The Eclipse Foundation. All Rights Reserved</span>
<p id="footercredit">This page was last modified 14:55, 3 April 2013 by <a href="http://wiki.eclipse.org/User:Juergen.schumacher.empolis.com" title="User:Juergen.schumacher.empolis.com">Juergen Schumacher</a>. Based on work by <a href="http://wiki.eclipse.org/index.php?title=User:Dhaenssgen.brox.de&amp;action=edit" class="new" title="User:Dhaenssgen.brox.de">Daniel Hänßgen</a> and <a href="http://wiki.eclipse.org/index.php?title=User:Juergen.schumacher.attensity.com&amp;action=edit" class="new" title="User:Juergen.schumacher.attensity.com">Juergen Schumacher</a>.</p>
<p id="footerviews">This page has been accessed 4,052 times.</p>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
var pageTracker = _gat._getTracker("UA-910670-4");
pageTracker._trackPageview();
</script>
<!-- <div class="visualClear"></div> -->
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served in 0.121 secs. --></body></html>