| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| <meta name="keywords" content="SMILA/Architecture Overview,SMILA/Attic/Architecture Overview,SMILA/Documentation" /> |
| <link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/favicon.ico" /> |
| <link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" /> |
| <link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&feed=rss" /> |
| <link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&feed=atom" /> |
| |
| |
| <title>SMILA/Architecture Overview - Eclipsepedia</title> |
| |
| <style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style> |
| <link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" /> |
| <link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" /> |
| <link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" /> |
| <!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]--> |
| <!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]--> |
| <!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]--> |
| <!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]--> |
| <!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]--> |
| <!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script> |
| <meta http-equiv="imagetoolbar" content="no" /><![endif]--> |
| <script type= "text/javascript">/*<![CDATA[*/ |
| var skin = "eclipsenova"; |
| var stylepath = "/skins"; |
| var wgArticlePath = "/$1"; |
| var wgScriptPath = ""; |
| var wgScript = "/index.php"; |
| var wgServer = "http://wiki.eclipse.org"; |
| var wgCanonicalNamespace = ""; |
| var wgCanonicalSpecialPageName = false; |
| var wgNamespaceNumber = 0; |
| var wgPageName = "SMILA/Architecture_Overview"; |
| var wgTitle = "SMILA/Architecture Overview"; |
| var wgAction = "view"; |
| var wgRestrictionEdit = []; |
| var wgRestrictionMove = []; |
| var wgArticleId = "19260"; |
| var wgIsArticle = true; |
| var wgUserName = null; |
| var wgUserGroups = null; |
| var wgUserLanguage = "en"; |
| var wgContentLanguage = "en"; |
| var wgBreakFrames = false; |
| var wgCurRevisionId = "269344"; |
| var wgVersion = "1.12.0"; |
| var wgEnableAPI = true; |
| var wgEnableWriteAPI = false; |
| /*]]>*/</script> |
| |
| <script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script> |
| |
| <!-- Performance mods similar to those for bug 166401 --> |
| <script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&action=raw&gen=js&useskin=eclipsenova"><!-- site js --></script> |
| |
| <!-- Head Scripts --> |
| <script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script> |
| <link rel="stylesheet" type="text/css" href="Architecture_Overview.html" /> </head> |
| <body class="mediawiki ns-0 ltr page-SMILA_Architecture_Overview"> |
| <div id="globalWrapper"> |
| |
| |
| <div id="column-one"> |
| <!-- Eclipse Additions for the Top Nav start here M. Ward--> |
| |
| <div id="header"> |
| <div id="header-graphic"> |
| <img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki"> |
| </div> |
| <!-- Pulled 101409 Mward --> |
| |
| <div class="portlet" id="p-personal"> |
| <div class="pBody"> |
| <ul> |
| <li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&returnto=SMILA/Architecture_Overview">Log in</a></li> |
| </ul> |
| </div> |
| </div> |
| |
| <div id="header-icons"> |
| <div id="sites"> |
| <ul id="sitesUL"> |
| <li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li> |
| <li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li> |
| <li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li> |
| <li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li> |
| <li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li> |
| <li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| <!-- NEW HEADER STUFF HERE --> |
| <div id="header-menu"> |
| <div id="header-nav"> |
| <ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li> |
| <li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li> |
| <li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li> |
| <li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li> |
| <li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li> |
| <li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li> |
| <li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li> |
| </ul> |
| </div> |
| <div id="header-utils"> |
| <!-- moved the search window here --> |
| <form action="http://wiki.eclipse.org/Special:Search" > |
| <input class="input" name="search" type="text" accesskey="f" value="" /> |
| <input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" /> |
| <input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" /> |
| </form> |
| </div> |
| </div> |
| |
| |
| <!-- Eclipse Additions for the Header stop here --> |
| <!-- Additions and mods for leftside nav Start here --> |
| |
| <!--Started nav rip here--> |
| <!-- these are the nav controls main page, changes etc --> |
| <div id="novaContent" class="faux"> |
| <div id="leftcol"> |
| <ul id="leftnav"> |
| <!-- these are the page controls, edit history etc --> |
| <li class="separator"><a class="separator">Navigation   </li> |
| <li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li> |
| <li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li> |
| <li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li> |
| <li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li> |
| <li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li> |
| <li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li> |
| <li class="separator"><a class="separator">Toolbox   </a></li> |
| |
| <li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Architecture_Overview">What links here</a></li> |
| <li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Architecture_Overview">Related changes</a></li> |
| <!-- This is the toolbox section --> |
| <li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li> |
| <li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li> |
| <li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Architecture_Overview&printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Architecture_Overview&oldid=269344">Permanent link</a></li> </ul> |
| </div> |
| |
| |
| <!-- Additions and mods for leftside nav End here --> |
| |
| |
| <div id="column-content"> |
| <div id="content"> |
| <a name="top" id="top"></a> |
| |
| <div id="tabs"> |
| <ul class="primary"> |
| <li class="active"><a href="Architecture_Overview.html"><span class="tab">Page</span></a></li> |
| <li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Architecture_Overview&action=edit"><span class="tab">Discussion</span></a></li> |
| <li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Architecture_Overview&action=edit"><span class="tab">View source</span></a></li> |
| <li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Architecture_Overview&action=history"><span class="tab">History</span></a></li> |
| <li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&returnto=SMILA/Architecture%20Overview"><span class="tab">Edit</span></a></li> |
| </ul> |
| </div> |
| |
| |
| <script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script> |
| <h1 class="firstHeading">SMILA/Architecture Overview</h1> |
| <div id="bodyContent"> |
| <h3 id="siteSub">From Eclipsepedia</h3> |
| <div id="contentSub"><span class="subpages">< <a href="../SMILA.html" title="SMILA">SMILA</a></span></div> |
| <div id="jump-to-nav">Jump to: <a href="Architecture_Overview.html#column-one">navigation</a>, <a href="Architecture_Overview.html#searchInput">search</a></div> <!-- start content --> |
| <p>This page describes the short overview of SMILA's current architecture. |
| </p> |
| <table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div> |
| <ul> |
| <li class="toclevel-1"><a href="Architecture_Overview.html#Introduction"><span class="tocnumber">1</span> <span class="toctext">Introduction</span></a></li> |
| <li class="toclevel-1"><a href="Architecture_Overview.html#Architecture_Overview"><span class="tocnumber">2</span> <span class="toctext">Architecture Overview</span></a> |
| <ul> |
| <li class="toclevel-2"><a href="Architecture_Overview.html#Description"><span class="tocnumber">2.1</span> <span class="toctext">Description</span></a></li> |
| <li class="toclevel-2"><a href="Architecture_Overview.html#Component_Documentation"><span class="tocnumber">2.2</span> <span class="toctext">Component Documentation</span></a></li> |
| </ul> |
| </li> |
| </ul> |
| </td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script> |
| <a name="Introduction"></a><h2> <span class="mw-headline"> Introduction </span></h2> |
| <p>SMILA is a framework that runs on top of OSGi runtime and therefore follows its component model. |
| </p> |
| <a name="Architecture_Overview"></a><h2> <span class="mw-headline"> Architecture Overview </span></h2> |
| <p><a href="http://wiki.eclipse.org/Image:SMILA Architecture Overview_0.9.png" class="image" title="Image:SMILA Architecture Overview_0.9.png"><img alt="Image:SMILA Architecture Overview_0.9.png" src="http://wiki.eclipse.org/images/5/55/SMILA_Architecture_Overview_0.9.png" width="960" height="720" border="0" /></a> |
| </p> |
| <a name="Description"></a><h3> <span class="mw-headline"> Description </span></h3> |
| <p>This architecture overview depicts generally two processes: preprocessing and information retrieval. |
| </p><p><b>Note:</b> <i>In case where SMILA is used for building a search application, we talk about indexing and search process.</i> |
| </p> |
| <ul> |
| <li> |
| <p> |
| The <i><b>preprocessing</b></i> process generally includes the interaction with the data source either by pushing (updated) data by agents or by pulling data by crawlers and pushing it into the system via the connectivity module. The information that can be pushed into the framework is in general document's metadata, content and diverse security relevant information i.e. access rights. |
| </p> |
| <p> |
| The connectivity module pushes the data into the bulkbuilder, that is the entry point to the asynchronous job management and persists the data in dedicated stores for further processing. The connectivity module has to provide a job name for which the record should be processed. |
| </p> |
| <p> |
| An indexing client can also use the REST API to push JSON objects (i.e. a document's metadata) into a running job directly without interacting with an agent or the connectivity module. |
| </p> |
| <p> |
| The bulkbuilder collects the records that has been pushed and stores them in bulks (storage entities comprising one to many records for effective bulk processing by the workers of the preprocessing workflow). |
| </p> |
| <p> |
| Metadata and access rights are stored in the object store. Content i.e. (large) binary data is stored in the binary store. Beside these two storages, SMILA also offers Delta Indexing store for keeping information about visited objects/documents during a crawling of a data source. |
| Ontology store is a dedicated store for persisting and managing ontologies. The Blackboard service represents a high level API for accessing record information by BPEL pipelines. |
| </p> |
| <p> |
| After one bulk has been completed by matching configured time or size constraints, the bulk is released and the JobManager will determine follow up tasks for the next worker(s) as defined by the workflow of the active job. |
| </p> |
| <p> |
| The WorkerManager listens for available tasks from the TaskManager and let them be processed by its workers. |
| These include PipelineProcessorWorkers that execute synchronous BPEL workflows. These workers initialize a Blackboard with the records to be processed and start a BPEL engine which executes desired workflow. The workflow again is defined by the order of execution of some services either provided by the framework itself or implemented by application's developer. |
| </p> |
| <p> |
| Since the job processing synchronizes itself via ZooKeeper across the whole cluster, the tasks can be executed on different nodes in the cluster, so the preprocessing can easily be spread and therefore parallelized across the whole cluster (provided that the storages are accessible from each node in the cluster). Thus the asynchonous job processing components are the central framework components which enable horizontal scaling of the preprocessing process in the framework. Workers can also be configured to process multiple tasks in parallel on one single node. |
| </p> |
| </li> |
| <li> |
| <p> |
| The <i><b>information retrieval</b></i> provides a swift access to previously preprocessed and stored information. Since this process is synchronous there has to be some external component responsible for distributing the load and therefore enabling the horizontal scalability of the information retrieval process. The flexible definition and execution of application's business logic is provided here also by calling a BPEL engine with a desired workflow. |
| </p> |
| </li> |
| <p> |
| <b>Hint:</b> |
| For initial architecture proposal please see the <a href="Attic/Architecture_Overview.html" title="SMILA/Attic/Architecture Overview">archived version</a>. |
| </p> |
| <p> |
| Original slides can be found here: <a href="http://wiki.eclipse.org/images/8/8e/SMILA_Architecture.zip" class="internal" title="SMILA Architecture.zip">SMILA Architecture.zip</a> |
| </p> |
| <p><br /> |
| </p> |
| <hr /> |
| <a name="Component_Documentation"></a><h3> <span class="mw-headline"> Component Documentation </span></h3> |
| <p>For further up to date documentation of all implemented components please see: |
| </p> |
| <table class="table.gallery" border="0"> |
| |
| <tr> |
| <td> <a href="Documentation.html" title="SMILA/Documentation">Component Documentation</a> |
| </td></tr></table></ul> |
| |
| <!-- |
| NewPP limit report |
| Preprocessor node count: 19/1000000 |
| Post-expand include size: 0/2097152 bytes |
| Template argument size: 0/2097152 bytes |
| #ifexist count: 0/100 |
| --> |
| |
| <!-- Saved in parser cache with key wikidb:pcache:idhash:19260-0!1!0!!en!2!edit=0 and timestamp 20111026205841 --> |
| <div class="printfooter"> |
| Retrieved from "<a href="Architecture_Overview.html">http://wiki.eclipse.org/SMILA/Architecture_Overview</a>"</div> |
| <div id="catlinks"><p class='catlinks'><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Category</a>: <span dir='ltr'><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></span></p></div> <!-- end content --> |
| <div class="visualClear"></div> |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| |
| <!-- Yoink of toolbox for phoenix moved up --> |
| |
| |
| </div> |
| </div> |
| <div id="clearFooter"/> |
| <div id="footer" > |
| <ul id="footernav"> |
| <li class="first"><a href="http://www.eclipse.org/">Home</a></li> |
| <li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li> |
| <li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li> |
| <li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li> |
| <li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li> |
| <li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li> |
| </ul> |
| <span id="copyright">Copyright © 2011 The Eclipse Foundation. All Rights Reserved</span> |
| <p id="footercredit">This page was last modified 09:28, 21 September 2011 by <a href="http://wiki.eclipse.org/User:Drazen.cindric.attensity.com" title="User:Drazen.cindric.attensity.com">Drazen Cindric</a>. Based on work by <a href="http://wiki.eclipse.org/User:Andreas.schank.attensity.com" title="User:Andreas.schank.attensity.com">A. Schank</a>, <a href="http://wiki.eclipse.org/index.php?title=User:Igor.novakovic.attensity.com&action=edit" class="new" title="User:Igor.novakovic.attensity.com">Igor Novakovic</a> and <a href="http://wiki.eclipse.org/User:Igor.novakovic.empolis.com" title="User:Igor.novakovic.empolis.com">Igor Novakovic</a>.</p> |
| <p id="footerviews">This page has been accessed 6,337 times.</p> |
| </div> |
| |
| <script type="text/javascript"> |
| var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www."); |
| document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E")); |
| </script> |
| <script type="text/javascript"> |
| var pageTracker = _gat._getTracker("UA-910670-4"); |
| pageTracker._trackPageview(); |
| </script> |
| |
| |
| |
| |
| |
| |
| |
| <!-- <div class="visualClear"></div> --> |
| |
| <script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script> |
| </div> |
| |
| <!-- Served in 0.077 secs. --></body></html> |