| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| <meta name="keywords" content="SMILA/Default configuration workflow overview,SMILA/Documentation/Agent,SMILA/Documentation/AgentController,SMILA/Documentation/Binary Storage,SMILA/Documentation/Bulkbuilder,SMILA/Documentation/ConnectivityManager,SMILA/Documentation/Crawler,SMILA/Documentation/CrawlerController,SMILA/Documentation/DeltaIndexingManager,SMILA/Documentation/JobManager,SMILA/Documentation/LuceneIndexPipelet" /> |
| <link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/favicon.ico" /> |
| <link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" /> |
| <link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&feed=rss" /> |
| <link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&feed=atom" /> |
| |
| |
| <title>SMILA/Default configuration workflow overview - Eclipsepedia</title> |
| |
| <style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style> |
| <link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" /> |
| <link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" /> |
| <!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]--> |
| <!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]--> |
| <!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]--> |
| <!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]--> |
| <!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]--> |
| <!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script> |
| <meta http-equiv="imagetoolbar" content="no" /><![endif]--> |
| <script type= "text/javascript">/*<![CDATA[*/ |
| var skin = "eclipsenova"; |
| var stylepath = "/skins"; |
| var wgArticlePath = "/$1"; |
| var wgScriptPath = ""; |
| var wgScript = "/index.php"; |
| var wgServer = "http://wiki.eclipse.org"; |
| var wgCanonicalNamespace = ""; |
| var wgCanonicalSpecialPageName = false; |
| var wgNamespaceNumber = 0; |
| var wgPageName = "SMILA/Default_configuration_workflow_overview"; |
| var wgTitle = "SMILA/Default configuration workflow overview"; |
| var wgAction = "view"; |
| var wgRestrictionEdit = []; |
| var wgRestrictionMove = []; |
| var wgArticleId = "16371"; |
| var wgIsArticle = true; |
| var wgUserName = null; |
| var wgUserGroups = null; |
| var wgUserLanguage = "en"; |
| var wgContentLanguage = "en"; |
| var wgBreakFrames = false; |
| var wgCurRevisionId = "269342"; |
| var wgVersion = "1.12.0"; |
| var wgEnableAPI = true; |
| var wgEnableWriteAPI = false; |
| /*]]>*/</script> |
| |
| <script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script> |
| |
| <!-- Performance mods similar to those for bug 166401 --> |
| <script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&action=raw&gen=js&useskin=eclipsenova"><!-- site js --></script> |
| |
| <!-- Head Scripts --> |
| <script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script> |
| <link rel="stylesheet" type="text/css" href="Workflow_Overview.html" /> </head> |
| <body class="mediawiki ns-0 ltr page-SMILA_Default_configuration_workflow_overview"> |
| <div id="globalWrapper"> |
| |
| |
| <div id="column-one"> |
| <!-- Eclipse Additions for the Top Nav start here M. Ward--> |
| |
| <div id="header"> |
| <div id="header-graphic"> |
| <img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki"> |
| </div> |
| <!-- Pulled 101409 Mward --> |
| |
| <div class="portlet" id="p-personal"> |
| <div class="pBody"> |
| <ul> |
| <li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&returnto=SMILA/Default_configuration_workflow_overview">Log in</a></li> |
| </ul> |
| </div> |
| </div> |
| |
| <div id="header-icons"> |
| <div id="sites"> |
| <ul id="sitesUL"> |
| <li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li> |
| <li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li> |
| <li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li> |
| <li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li> |
| <li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li> |
| <li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| <!-- NEW HEADER STUFF HERE --> |
| <div id="header-menu"> |
| <div id="header-nav"> |
| <ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li> |
| <li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li> |
| <li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li> |
| <li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li> |
| <li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li> |
| <li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li> |
| <li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li> |
| </ul> |
| </div> |
| <div id="header-utils"> |
| <!-- moved the search window here --> |
| <form action="http://wiki.eclipse.org/Special:Search" > |
| <input class="input" name="search" type="text" accesskey="f" value="" /> |
| <input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" /> |
| <input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" /> |
| </form> |
| </div> |
| </div> |
| |
| |
| <!-- Eclipse Additions for the Header stop here --> |
| <!-- Additions and mods for leftside nav Start here --> |
| |
| <!--Started nav rip here--> |
| <!-- these are the nav controls main page, changes etc --> |
| <div id="novaContent" class="faux"> |
| <div id="leftcol"> |
| <ul id="leftnav"> |
| <!-- these are the page controls, edit history etc --> |
| <li class="separator"><a class="separator">Navigation   </li> |
| <li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li> |
| <li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li> |
| <li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li> |
| <li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li> |
| <li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li> |
| <li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li> |
| <li class="separator"><a class="separator">Toolbox   </a></li> |
| |
| <li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Default_configuration_workflow_overview">What links here</a></li> |
| <li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Default_configuration_workflow_overview">Related changes</a></li> |
| <!-- This is the toolbox section --> |
| <li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li> |
| <li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li> |
| <li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Default_configuration_workflow_overview&printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Default_configuration_workflow_overview&oldid=269342">Permanent link</a></li> </ul> |
| </div> |
| |
| |
| <!-- Additions and mods for leftside nav End here --> |
| |
| |
| <div id="column-content"> |
| <div id="content"> |
| <a name="top" id="top"></a> |
| |
| <div id="tabs"> |
| <ul class="primary"> |
| <li class="active"><a href="Default_configuration_workflow_overview.html"><span class="tab">Page</span></a></li> |
| <li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Default_configuration_workflow_overview&action=edit"><span class="tab">Discussion</span></a></li> |
| <li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Default_configuration_workflow_overview&action=edit"><span class="tab">View source</span></a></li> |
| <li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Default_configuration_workflow_overview&action=history"><span class="tab">History</span></a></li> |
| <li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&returnto=SMILA/Default%20configuration%20workflow%20overview"><span class="tab">Edit</span></a></li> |
| </ul> |
| </div> |
| |
| |
| <script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script> |
| <h1 class="firstHeading">SMILA/Default configuration workflow overview</h1> |
| <div id="bodyContent"> |
| <h3 id="siteSub">From Eclipsepedia</h3> |
| <div id="contentSub"><span class="subpages">< <a href="../SMILA.html" title="SMILA">SMILA</a></span>(Redirected from <a href="http://wiki.eclipse.org/index.php?title=SMILA/Workflow_Overview&redirect=no" title="SMILA/Workflow Overview">SMILA/Workflow Overview</a>)</div> |
| <div id="jump-to-nav">Jump to: <a href="Workflow_Overview.html#column-one">navigation</a>, <a href="Workflow_Overview.html#searchInput">search</a></div> <!-- start content --> |
| <p><a href="http://wiki.eclipse.org/Image:DefaultConfigurationWorkflow.png" class="image" title="Image:DefaultConfigurationWorkflow.png"><img alt="Image:DefaultConfigurationWorkflow.png" src="http://wiki.eclipse.org/images/3/3d/DefaultConfigurationWorkflow.png" width="884" height="650" border="0" /></a> |
| </p><p>(original slides can be found here: <a href="http://wiki.eclipse.org/images/d/d0/DefaultConfigurationWorkflow.zip" class="internal" title="DefaultConfigurationWorkflow.zip">DefaultConfigurationWorkflow.zip</a>) |
| </p> |
| <a name="The_diagram_description"></a><h2> <span class="mw-headline"> The diagram description</span></h2> |
| <ul><li> 1. Data is imported via <a href="Documentation/Crawler.html" title="SMILA/Documentation/Crawler">Crawler</a> (or <a href="Documentation/Agent.html" title="SMILA/Documentation/Agent">Agent</a>) by configuring a data source and a <a href="Glossary.html#J" title="SMILA/Glossary">job</a> name via the <a href="Documentation/CrawlerController.html" title="SMILA/Documentation/CrawlerController">Crawler Controller</a> (resp. <a href="Documentation/AgentController.html" title="SMILA/Documentation/AgentController">Agent Controller</a>) JMX API. |
| </li><li> 2. The <a href="Documentation/CrawlerController.html" title="SMILA/Documentation/CrawlerController">Crawler Controller</a> initializes the <a href="Documentation/Crawler.html" title="SMILA/Documentation/Crawler">Crawler</a> by assigning a data source and starting the import |
| </li><li> 3. The <a href="Documentation/Crawler.html" title="SMILA/Documentation/Crawler">Crawler</a> retrieves data references from the <b>Data Source</b> and returns them to the <a href="Documentation/CrawlerController.html" title="SMILA/Documentation/CrawlerController">Crawler Controller</a>. |
| </li><li> 4. The <a href="Documentation/CrawlerController.html" title="SMILA/Documentation/CrawlerController">Crawler Controller</a> determines whether this particular data is new/modified or was already indexed by querying the <a href="Documentation/DeltaIndexingManager.html" title="SMILA/Documentation/DeltaIndexingManager">Delta Indexing Service</a>. |
| </li><li> 5. If the data was not previously indexed, the <a href="Documentation/CrawlerController.html" title="SMILA/Documentation/CrawlerController">Crawler Controller</a> instructs the <a href="Documentation/Crawler.html" title="SMILA/Documentation/Crawler">Crawler</a> to retrieve the full data plus content as <a href="Glossary.html#R" title="SMILA/Glossary">Record</a> (metadata + attachment). |
| </li><li> 6. The <a href="Documentation/Crawler.html" title="SMILA/Documentation/Crawler">Crawler</a> fetches the complete record from the <b>Data Source</b>. Each record has an ID and can contain metadata and attachments (binary content). |
| </li><li> 7. The <a href="Documentation/CrawlerController.html" title="SMILA/Documentation/CrawlerController">Crawler Controller</a> sends the complete retrieved records to the <a href="Documentation/ConnectivityManager.html" title="SMILA/Documentation/ConnectivityManager">Connectivity Manager</a>. |
| </li><li> 8. The <a href="Documentation/ConnectivityManager.html" title="SMILA/Documentation/ConnectivityManager">Connectivity Manager</a> routes the records to the configured job by pushing them to the <a href="Documentation/Bulkbuilder.html" title="SMILA/Documentation/Bulkbuilder">Bulkbuilder</a>. |
| </li><li> 9. The <a href="Documentation/Bulkbuilder.html" title="SMILA/Documentation/Bulkbuilder">Bulkbuilder</a> persists the record's attachment content via the <a href="Documentation/Usage_of_Blackboard_Service.html" title="SMILA/Documentation/Usage of Blackboard Service">Blackboard</a> in the <a href="Documentation/Binary_Storage.html" title="SMILA/Documentation/Binary Storage">Binary Storage</a>. Only attachment references remanin in the records. Should any subsequent processes require the record’s full content, they can access it via the <a href="Documentation/Usage_of_Blackboard_Service.html" title="SMILA/Documentation/Usage of Blackboard Service">Blackboard</a>. |
| </li><li> 10. Records are cumulated in <a href="Glossary.html#B" title="SMILA/Glossary">bulks</a> for asynchronous workflow processing. <a href="Glossary.html#R" title="SMILA/Glossary">Record bulks</a> are stored in <b>ObjectStore</b>. |
| </li><li> 11. An <a href="Glossary.html#W" title="SMILA/Glossary">asynchronous workflows</a> is executed triggered by the <a href="Documentation/Bulkbuilder.html" title="SMILA/Documentation/Bulkbuilder">Bulkbuilder</a> generated record bulk. This is managed by the <a href="Documentation/JobManager.html" title="SMILA/Documentation/JobManager">Jobmanager</a> and <a href="Documentation/TaskManager.html" title="SMILA/Documentation/TaskManager">Taskmanager</a> components. Runtime/Synchronization data is stored in <b>Zookeeper</b>, persistent data is stored in <b>ObjectStore</b>. |
| </li><li> 12. Predefined asynchronous workflow <i>indexUpdate</i> contains <a href="Documentation/Worker/PipelineProcessorWorker.html" title="SMILA/Documentation/Worker/PipelineProcessorWorker">BPEL worker</a> for embedding (resp. executing) synchronous BPEL pipelines in the asynchronous workflow. Added records are passed to the predefined BPEL pipeline <i>AddPipeline</i>, deleted records to the <i>DeletePipeline</i>. |
| </li><li> 13. A BPEL pipeline uses a set of <i>Pipelets</i> to process a record's data (e.g. extracting text from various document or image file types). After processing the records the pipelets can store the gathered additional data via the <a href="Documentation/Usage_of_Blackboard_Service.html" title="SMILA/Documentation/Usage of Blackboard Service">Blackboard</a> service. |
| </li><li> 14. The Add- and DeletePipeline contain a <a href="Documentation/LuceneIndexPipelet.html" title="SMILA/Documentation/LuceneIndexPipelet">LuceneIndexPipelet</a> which is finally invoked to update the <b>Lucene Index</b>. |
| </li></ul> |
| |
| <!-- |
| NewPP limit report |
| Preprocessor node count: 2/1000000 |
| Post-expand include size: 0/2097152 bytes |
| Template argument size: 0/2097152 bytes |
| #ifexist count: 0/100 |
| --> |
| |
| <!-- Saved in parser cache with key wikidb:pcache:idhash:16371-0!1!0!!en!2!edit=0 and timestamp 20111027085602 --> |
| <div class="printfooter"> |
| Retrieved from "<a href="Default_configuration_workflow_overview.html">http://wiki.eclipse.org/SMILA/Default_configuration_workflow_overview</a>"</div> |
| <!-- end content --> |
| <div class="visualClear"></div> |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| |
| <!-- Yoink of toolbox for phoenix moved up --> |
| |
| |
| </div> |
| </div> |
| <div id="clearFooter"/> |
| <div id="footer" > |
| <ul id="footernav"> |
| <li class="first"><a href="http://www.eclipse.org/">Home</a></li> |
| <li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li> |
| <li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li> |
| <li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li> |
| <li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li> |
| <li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li> |
| </ul> |
| <span id="copyright">Copyright © 2011 The Eclipse Foundation. All Rights Reserved</span> |
| <p id="footercredit">This page was last modified 09:26, 21 September 2011 by <a href="http://wiki.eclipse.org/User:Drazen.cindric.attensity.com" title="User:Drazen.cindric.attensity.com">Drazen Cindric</a>. Based on work by <a href="http://wiki.eclipse.org/User:Andreas.Weber.empolis.com" title="User:Andreas.Weber.empolis.com">Andreas Weber</a>, <a href="http://wiki.eclipse.org/index.php?title=User:Juergen.schumacher.attensity.com&action=edit" class="new" title="User:Juergen.schumacher.attensity.com">Juergen Schumacher</a> and <a href="http://wiki.eclipse.org/User:Igor.novakovic.empolis.com" title="User:Igor.novakovic.empolis.com">Igor Novakovic</a> and <a href="http://wiki.eclipse.org/index.php?title=SMILA/Default_configuration_workflow_overview&action=credits" title="SMILA/Default configuration workflow overview">others</a>.</p> |
| <p id="footerviews">This page has been accessed 4,924 times.</p> |
| </div> |
| |
| <script type="text/javascript"> |
| var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www."); |
| document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E")); |
| </script> |
| <script type="text/javascript"> |
| var pageTracker = _gat._getTracker("UA-910670-4"); |
| pageTracker._trackPageview(); |
| </script> |
| |
| |
| |
| |
| |
| |
| |
| <!-- <div class="visualClear"></div> --> |
| |
| <script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script> |
| </div> |
| |
| <!-- Served in 0.116 secs. --></body></html> |