blob: 50435cab00668fcdb94e9aceb621cc57a6232dcd [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="SMILA/Documentation/JobManager,SMILA/Documentation/Bulkbuilder,SMILA/Documentation/DataObjectTypesAndBuckets,SMILA/Documentation/Importing/Concept,SMILA/Documentation/JobDefinitions,SMILA/Documentation/JobManagerConfiguration,SMILA/Documentation/JobManagerFirstExample,SMILA/Documentation/JobParameters,SMILA/Documentation/JobRuns,SMILA/Documentation/ObjectStore/Bundle org.eclipse.smila.objectstore,SMILA/Documentation/TaskGenerators" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/Documentation/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" />
<link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=rss" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=atom" />
<title>SMILA/Documentation/JobManager - Eclipsepedia</title>
<style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "http://wiki.eclipse.org/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style>
<link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" />
<link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" />
<!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]-->
<!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]-->
<!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]-->
<!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]-->
<!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]-->
<!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "eclipsenova";
var stylepath = "/skins";
var wgArticlePath = "/$1";
var wgScriptPath = "";
var wgScript = "/index.php";
var wgServer = "http://wiki.eclipse.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "SMILA/Documentation/JobManager";
var wgTitle = "SMILA/Documentation/JobManager";
var wgAction = "view";
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
var wgArticleId = "32414";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = "325790";
var wgVersion = "1.12.0";
var wgEnableAPI = true;
var wgEnableWriteAPI = false;
/*]]>*/</script>
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script>
<!-- Performance mods similar to those for bug 166401 -->
<script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=eclipsenova"><!-- site js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script>
<link rel="stylesheet" type="text/css" href="JobManager.html" /> </head>
<body class="mediawiki ns-0 ltr page-SMILA_Documentation_JobManager">
<div id="globalWrapper">
<div id="column-one">
<!-- Eclipse Additions for the Top Nav start here M. Ward-->
<div id="header">
<div id="header-graphic">
<img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki">
</div>
<!-- Pulled 101409 Mward -->
<div class="portlet" id="p-personal">
<div class="pBody">
<ul>
<li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/JobManager">Log in</a></li>
</ul>
</div>
</div>
<div id="header-icons">
<div id="sites">
<ul id="sitesUL">
<li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li>
<li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li>
<li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li>
<li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li>
<li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li>
<li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li>
</ul>
</div>
</div>
</div>
<!-- NEW HEADER STUFF HERE -->
<div id="header-menu">
<div id="header-nav">
<ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li>
<li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li>
<li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li>
<li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li>
<li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li>
<li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li>
<li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li>
</ul>
</div>
<div id="header-utils">
<!-- moved the search window here -->
<form action="http://wiki.eclipse.org/Special:Search" >
<input class="input" name="search" type="text" accesskey="f" value="" />
<input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" />&nbsp;
<input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" />
</form>
</div>
</div>
<!-- Eclipse Additions for the Header stop here -->
<!-- Additions and mods for leftside nav Start here -->
<!--Started nav rip here-->
<!-- these are the nav controls main page, changes etc -->
<div id="novaContent" class="faux">
<div id="leftcol">
<ul id="leftnav">
<!-- these are the page controls, edit history etc -->
<li class="separator"><a class="separator">Navigation &#160;&#160;</li>
<li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li>
<li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li>
<li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li>
<li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li>
<li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li>
<li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li>
<li class="separator"><a class="separator">Toolbox &#160;&#160;</a></li>
<li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Documentation/JobManager">What links here</a></li>
<li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Documentation/JobManager">Related changes</a></li>
<!-- This is the toolbox section -->
<li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li>
<li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li>
<li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JobManager&amp;printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JobManager&amp;oldid=325790">Permanent link</a></li> </ul>
</div>
<!-- Additions and mods for leftside nav End here -->
<div id="column-content">
<div id="content">
<a name="top" id="top"></a>
<div id="tabs">
<ul class="primary">
<li class="active"><a href="JobManager.html"><span class="tab">Page</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Documentation/JobManager&amp;action=edit"><span class="tab">Discussion</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JobManager&amp;action=edit"><span class="tab">View source</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JobManager&amp;action=history"><span class="tab">History</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/JobManager"><span class="tab">Edit</span></a></li>
</ul>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<h1 class="firstHeading">SMILA/Documentation/JobManager</h1>
<div id="bodyContent">
<h3 id="siteSub">From Eclipsepedia</h3>
<div id="contentSub"><span class="subpages">&lt; <a href="../../SMILA.html" title="SMILA">SMILA</a> | <a href="../Documentation.1.html" title="SMILA/Documentation">Documentation</a></span></div>
<div id="jump-to-nav">Jump to: <a href="JobManager.html#column-one">navigation</a>, <a href="JobManager.html#searchInput">search</a></div> <!-- start content -->
<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1"><a href="JobManager.html#Job_Manager"><span class="tocnumber">1</span> <span class="toctext">Job Manager</span></a>
<ul>
<li class="toclevel-2"><a href="JobManager.html#What_are_Asynchronous_Workflows.3F"><span class="tocnumber">1.1</span> <span class="toctext">What are Asynchronous Workflows?</span></a></li>
<li class="toclevel-2"><a href="JobManager.html#Common_Behavior_of_JobManager_definition_APIs"><span class="tocnumber">1.2</span> <span class="toctext">Common Behavior of JobManager definition APIs</span></a></li>
<li class="toclevel-2"><a href="JobManager.html#Using_the_Job_Manager"><span class="tocnumber">1.3</span> <span class="toctext">Using the Job Manager</span></a></li>
</ul>
</li>
</ul>
</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
<a name="Job_Manager"></a><h1> <span class="mw-headline"> Job Manager </span></h1>
<p>The Job Manager controls the processing logic of <a href="../Glossary.html#W" title="SMILA/Glossary">asynchronous workflows</a> in SMILA by regulating the Task Manager, which in turn generates tasks and decides which task should be processed by which <a href="../Glossary.html#W" title="SMILA/Glossary">worker</a> and when.
</p>
<a name="What_are_Asynchronous_Workflows.3F"></a><h3> <span class="mw-headline"> What are Asynchronous Workflows? </span></h3>
<p><i>Asynchronous workflow</i> consists of a set of <i><a href="../Glossary.html#A" title="SMILA/Glossary">actions</a></i>. Each action connects the input and output <i><a href="../Glossary.html#S" title="SMILA/Glossary">slots</a></i> of a <i><a href="../Glossary.html#W" title="SMILA/Glossary">workers</a></i> to appropriate <i><a href="../Glossary.html#B" title="SMILA/Glossary">buckets</a></i>. A bucket is a virtual container of <i>data objects</i> of the same type. The most common data object type in SMILA is the <i>record bulk</i>, which is just a concatenated sequence of records (including attachments) stored in the <a href="ObjectStore/Bundle_org.eclipse.smila.objectstore.html" title="SMILA/Documentation/ObjectStore/Bundle org.eclipse.smila.objectstore">ObjectStore service</a>. When a new data object arrives in a bucket connected to the input slot of a worker (usually created by a worker that has the bucket connected to its output slot), a task is created for the worker to process this object and to produce data object with the results in the buckets connected to the output slots. Thus the workflow (consisting of actions reading from and writing to buckets) describes a data flow of the data objects through the workers. The workflow usually starts with a worker that creates data objects from data sent to a SMILA API (e.g. the <a href="Bulkbuilder.html" title="SMILA/Documentation/Bulkbuilder">Bulkbuilder</a> creates bulks of records sent by external or internal clients) or from the data which have been extracted from an external data source (e.g. a <a href="Importing/Concept.html" title="SMILA/Documentation/Importing/Concept">Crawler</a> worker). The workflow ends either when workers do not have output buckets, or the output buckets are not connected to input slots of other workers. Then all temporary data objects created during the workflow are deleted and only the data objects in buckets marked as <i>persistent</i> will remain.
</p><p>A workflow definition is usually still kind of generic because it does not define all the parameters needed by workers (e.g. the name of an index to build) and buckets (e.g. the name of the store for temporary data objects) used in the actions. To execute a workflow, a <i><a href="../Glossary.html#J" title="SMILA/Glossary">job</a></i> must be defined that sets all these parameters to appropriate values. Then the job can be started which initiates a <i><a href="../Glossary.html#J" title="SMILA/Glossary">job run</a></i>. As long as the job run is active, a new data can be submitted to it and the JobManager will take care that it is processed by the workflow. Finally, after receiving finish command, the job run will not accept any new data, but the job will finish to process the already submitted data (<i><a href="../Glossary.html#W" title="SMILA/Glossary">workflow runs</a></i>). Then the job can be started again and repeated arbitrary number of times. All the time it is possible to monitor the job run and see the amount of data being processed by a worker during some time period and how many errors have occurred and how much work is still to be done. After the job run has finally finished the monitoring data is persisted for later sighting.
</p><p>Two main components are responsible for making this work: The JobManager knows the workflow and job definitions, he controls the creation of initial and follow-up tasks and accumulates the monitoring data measured with each task being finished. The TaskManager knows which tasks are to be done by which worker and which tasks are currently in progress, he also delivers tasks to workers which are available currently and ensures that a task will be repeated if a worker has died while working on it. All this works in a cluster of SMILA nodes as well, so the work can easily and reliably be distributed and parallelized across all nodes.
</p><p>Check out this <a href="JobManagerFirstExample.html" title="SMILA/Documentation/JobManagerFirstExample">very simple first example</a> for all of this.
</p>
<a name="Common_Behavior_of_JobManager_definition_APIs"></a><h2> <span class="mw-headline"> Common Behavior of JobManager definition APIs </span></h2>
<p>SMILA provides APIs to read and write JobManager configuration elements. (Currently you can only write buckets, workflows and job definitions). The pages linked below describe the specific APIs to do this. However, they have some common properties:
</p>
<ul><li> Elements can be defined either in the system configuration, or using the APIs. System-defined elements cannot be changed by API calls. Therefore, when reading such system-defined elements using the API, they will contain a <code>readOnly</code> flag set to <code>true</code>. Requests to update those elements will result in an error. You cannot set this flag when you create own elements to protect them from being overwritten. The API will remove it.
</li><li> User-defined elements, on the other hand, will contain a timestamp attribute holding the information about when an element has been lastly changed. This can be used by modeling tools to ensure that they do not overwrite changes made by other users. You cannot set this timestamp yourself in an update request, it will be overwritten by the API.
</li><li> Additionally, when a update request for an element is performed successfully, the response object will also contain the <code>timestamp</code> attribute generated for this update action.
</li><li> Apart from the required and optional structure and content of the job manager, elements as specified in the pages linked below, each element can contain additional information as needed by the user. This makes it possible to add comments, descriptions, author information, etc. However, the read APIs show this additional information in the result objects only, if invoked with a <code>...?returnDetails=true</code>. Otherwise the response will contain only the basic information.
</li></ul>
<p>See the following pages for examples of all this behavior.
</p>
<a name="Using_the_Job_Manager"></a><h2> <span class="mw-headline"> Using the Job Manager </span></h2>
<ul><li><a href="JobManagerConfiguration.html" title="SMILA/Documentation/JobManagerConfiguration">Configuration</a>
</li><li><a href="DataObjectTypesAndBuckets.html" title="SMILA/Documentation/DataObjectTypesAndBuckets">Creating and managing buckets</a>
</li><li><a href="WorkerAndWorkflows.html" title="SMILA/Documentation/WorkerAndWorkflows">Modeling workflows</a>
</li><li><a href="TaskGenerators.html" title="SMILA/Documentation/TaskGenerators">Task Generators</a>
</li><li><a href="JobDefinitions.html" title="SMILA/Documentation/JobDefinitions">Creating jobs</a>
</li><li><a href="JobRuns.html" title="SMILA/Documentation/JobRuns">Running and monitoring jobs</a>
</li><li><a href="JobParameters.html" title="SMILA/Documentation/JobParameters">Setting parameters</a>
</li><li><a href="JobManagerFirstExample.html" title="SMILA/Documentation/JobManagerFirstExample">A first example</a>
</li></ul>
<!--
NewPP limit report
Preprocessor node count: 9/1000000
Post-expand include size: 0/2097152 bytes
Template argument size: 0/2097152 bytes
#ifexist count: 0/100
-->
<!-- Saved in parser cache with key wikidb:pcache:idhash:32414-0!1!0!!en!2!edit=0 and timestamp 20130416060932 -->
<div class="printfooter">
Retrieved from "<a href="JobManager.html">http://wiki.eclipse.org/SMILA/Documentation/JobManager</a>"</div>
<!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<!-- Yoink of toolbox for phoenix moved up -->
</div>
</div>
<div id="clearFooter"/>
<div id="footer" >
<ul id="footernav">
<li class="first"><a href="http://www.eclipse.org/">Home</a></li>
<li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li>
<li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li>
</ul>
<span id="copyright">Copyright &copy; 2013 The Eclipse Foundation. All Rights Reserved</span>
<p id="footercredit">This page was last modified 11:52, 8 January 2013 by <a href="http://wiki.eclipse.org/index.php?title=User:Dhaenssgen.brox.de&amp;action=edit" class="new" title="User:Dhaenssgen.brox.de">Daniel Hänßgen</a>. Based on work by <a href="http://wiki.eclipse.org/User:Igor.novakovic.attensity.com" title="User:Igor.novakovic.attensity.com">Igor Novakovic</a>, <a href="http://wiki.eclipse.org/User:Andreas.schank.attensity.com" title="User:Andreas.schank.attensity.com">A. Schank</a> and <a href="http://wiki.eclipse.org/index.php?title=User:Juergen.schumacher.attensity.com&amp;action=edit" class="new" title="User:Juergen.schumacher.attensity.com">Juergen Schumacher</a> and <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JobManager&amp;action=credits" title="SMILA/Documentation/JobManager">others</a>.</p>
<p id="footerviews">This page has been accessed 1,888 times.</p>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
var pageTracker = _gat._getTracker("UA-910670-4");
pageTracker._trackPageview();
</script>
<!-- <div class="visualClear"></div> -->
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served in 0.123 secs. --></body></html>