blob: 3d88676b30a3869c1b27a6c52dac0a758eeec4e0 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="SMILA/Project Concepts/Connectivity,SMILA/Project Concepts/ConnectivityMessageInterface,SMILA/Project Concepts/ID Concept,SMILA/Project Concepts/IRM,Daniel.stucky.empolis.com" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/Project_Concepts/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" />
<link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=rss" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=atom" />
<title>SMILA/Project Concepts/Connectivity - Eclipsepedia</title>
<style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style>
<link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" />
<link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" />
<!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]-->
<!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]-->
<!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]-->
<!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]-->
<!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]-->
<!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "eclipsenova";
var stylepath = "/skins";
var wgArticlePath = "/$1";
var wgScriptPath = "";
var wgScript = "/index.php";
var wgServer = "http://wiki.eclipse.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "SMILA/Project_Concepts/Connectivity";
var wgTitle = "SMILA/Project Concepts/Connectivity";
var wgAction = "view";
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
var wgArticleId = "15224";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = "113296";
var wgVersion = "1.12.0";
var wgEnableAPI = true;
var wgEnableWriteAPI = false;
/*]]>*/</script>
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script>
<!-- Performance mods similar to those for bug 166401 -->
<script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=eclipsenova"><!-- site js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script>
<style type="text/css">/*<![CDATA[*/
.source-java {line-height: normal; font-size: medium;}
.source-java li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for java
* CSS class: source-java, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-java .de1, .source-java .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-java {}
.source-java .head {}
.source-java .foot {}
.source-java .imp {font-weight: bold; color: red;}
.source-java .ln-xtra {color: #cc0; background-color: #ffc;}
.source-java li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-java li.li2 {font-weight: bold;}
.source-java .kw1 {color: #7F0055; font-weight: bold;}
.source-java .kw2 {color: #7F0055; font-weight: bold;}
.source-java .kw3 {color: #000000; font-weight: normal}
.source-java .kw4 {color: #7F0055; font-weight: bold;}
.source-java .co1 {color: #3F7F5F; font-style: italic;}
.source-java .co2 {color: #3F7F5F;}
.source-java .co3 {color: #3F7F5F; font-style: italic; font-weight: bold;}
.source-java .coMULTI {color: #3F5FBF; font-style: italic;}
.source-java .es0 {color: #000000;}
.source-java .br0 {color: #000000;}
.source-java .st0 {color: #2A00ff;}
.source-java .nu0 {color: #000000;}
.source-java .me1 {color: #000000;}
.source-java .me2 {color: #000000;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><link rel="stylesheet" type="text/css" href="Connectivity.html" /> </head>
<body class="mediawiki ns-0 ltr page-SMILA_Project_Concepts_Connectivity">
<div id="globalWrapper">
<div id="column-one">
<!-- Eclipse Additions for the Top Nav start here M. Ward-->
<div id="header">
<div id="header-graphic">
<img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki">
</div>
<!-- Pulled 101409 Mward -->
<div class="portlet" id="p-personal">
<div class="pBody">
<ul>
<li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Project_Concepts/Connectivity">Log in</a></li>
</ul>
</div>
</div>
<div id="header-icons">
<div id="sites">
<ul id="sitesUL">
<li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li>
<li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li>
<li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li>
<li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li>
<li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li>
<li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li>
</ul>
</div>
</div>
</div>
<!-- NEW HEADER STUFF HERE -->
<div id="header-menu">
<div id="header-nav">
<ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li>
<li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li>
<li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li>
<li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li>
<li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li>
<li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li>
<li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li>
</ul>
</div>
<div id="header-utils">
<!-- moved the search window here -->
<form action="http://wiki.eclipse.org/Special:Search" >
<input class="input" name="search" type="text" accesskey="f" value="" />
<input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" />&nbsp;
<input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" />
</form>
</div>
</div>
<!-- Eclipse Additions for the Header stop here -->
<!-- Additions and mods for leftside nav Start here -->
<!--Started nav rip here-->
<!-- these are the nav controls main page, changes etc -->
<div id="novaContent" class="faux">
<div id="leftcol">
<ul id="leftnav">
<!-- these are the page controls, edit history etc -->
<li class="separator"><a class="separator">Navigation &#160;&#160;</li>
<li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li>
<li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li>
<li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li>
<li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li>
<li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li>
<li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li>
<li class="separator"><a class="separator">Toolbox &#160;&#160;</a></li>
<li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Project_Concepts/Connectivity">What links here</a></li>
<li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Project_Concepts/Connectivity">Related changes</a></li>
<!-- This is the toolbox section -->
<li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li>
<li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li>
<li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Project_Concepts/Connectivity&amp;printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Project_Concepts/Connectivity&amp;oldid=113296">Permanent link</a></li> </ul>
</div>
<!-- Additions and mods for leftside nav End here -->
<div id="column-content">
<div id="content">
<a name="top" id="top"></a>
<div id="tabs">
<ul class="primary">
<li class="active"><a href="Connectivity.html"><span class="tab">Page</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Project_Concepts/Connectivity&amp;action=edit"><span class="tab">Discussion</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Project_Concepts/Connectivity&amp;action=edit"><span class="tab">View source</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Project_Concepts/Connectivity&amp;action=history"><span class="tab">History</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Project%20Concepts/Connectivity"><span class="tab">Edit</span></a></li>
</ul>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<h1 class="firstHeading">SMILA/Project Concepts/Connectivity</h1>
<div id="bodyContent">
<h3 id="siteSub">From Eclipsepedia</h3>
<div id="contentSub"><span class="subpages">&lt; <a href="../../SMILA.html" title="SMILA">SMILA</a> | <a href="../Project_Concepts.1.html" title="SMILA/Project Concepts">Project Concepts</a></span></div>
<div id="jump-to-nav">Jump to: <a href="Connectivity.html#column-one">navigation</a>, <a href="Connectivity.html#searchInput">search</a></div> <!-- start content -->
<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1"><a href="Connectivity.html#Description"><span class="tocnumber">1</span> <span class="toctext">Description</span></a></li>
<li class="toclevel-1"><a href="Connectivity.html#Discussion"><span class="tocnumber">2</span> <span class="toctext">Discussion</span></a></li>
<li class="toclevel-1"><a href="Connectivity.html#Technical_proposal"><span class="tocnumber">3</span> <span class="toctext">Technical proposal</span></a>
<ul>
<li class="toclevel-2"><a href="Connectivity.html#Overview"><span class="tocnumber">3.1</span> <span class="toctext">Overview</span></a></li>
<li class="toclevel-2"><a href="Connectivity.html#Sub-Components"><span class="tocnumber">3.2</span> <span class="toctext">Sub-Components</span></a>
<ul>
<li class="toclevel-3"><a href="Connectivity.html#APIs"><span class="tocnumber">3.2.1</span> <span class="toctext">APIs</span></a></li>
<li class="toclevel-3"><a href="Connectivity.html#Processor"><span class="tocnumber">3.2.2</span> <span class="toctext">Processor</span></a></li>
<li class="toclevel-3"><a href="Connectivity.html#Buffer_.28P2.29"><span class="tocnumber">3.2.3</span> <span class="toctext">Buffer (P2)</span></a></li>
<li class="toclevel-3"><a href="Connectivity.html#Router"><span class="tocnumber">3.2.4</span> <span class="toctext">Router</span></a></li>
<li class="toclevel-3"><a href="Connectivity.html#Delta_Indexing_Manager"><span class="tocnumber">3.2.5</span> <span class="toctext">Delta Indexing Manager</span></a></li>
</ul>
</li>
<li class="toclevel-2"><a href="Connectivity.html#Interfaces"><span class="tocnumber">3.3</span> <span class="toctext">Interfaces</span></a></li>
<li class="toclevel-2"><a href="Connectivity.html#Workflow"><span class="tocnumber">3.4</span> <span class="toctext">Workflow</span></a></li>
</ul>
</li>
</ul>
</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
<a name="Description"></a><h2> <span class="mw-headline"> Description </span></h2>
<p>The Connectivity Manager is the entry point for external data. It is a single point of entry - on information level. The Connectivity Manager normalizes incoming information to an internally used message format. Large sets of incoming data (binary data) should also be persisted into an external storage to reduce the queue load. It also includes functionality for buffering and routing of the incoming information.
</p>
<a name="Discussion"></a><h2> <span class="mw-headline"> Discussion </span></h2>
<ul><li> <a href="http://wiki.eclipse.org/User:Daniel.stucky.empolis.com" title="User:Daniel.stucky.empolis.com">Daniel Stucky</a>: While implementing and testing the LucenePipelet, I found out that there may be a need to send special actions after a DataSource was indexed (e.g. for Lucene: send a flush action so that all changes are visible in the index to IndexReaders). I already discussed this with Igor and we had the following idea: As the whole system is asynchronus, we don't exatcly know when a index job has finished completely. ConnectivityManager knows when a Crawler has finished crawling. Thereafter it could create a special Queue Message to execute a pipeline (e.g. to flush Lucene index). Of course there may still be messages of the crawled DataSource either in the Buffer or in the Queue. Therefor before sending this special message, it should be checked that the Buffer and the Queue don't contain anymore messages belonging to the datasource.
</li></ul>
<p>As Lucene IndexReaders only have the index state of the time the index was opened available, they have to reopen the index from time to time to be up to date. The only posibility is to poll the index on a regular basis. This could be done in the Lucene Query Service or more genereal using some scheduling service of SMILA.
</p>
<a name="Technical_proposal"></a><h2> <span class="mw-headline"> Technical proposal </span></h2>
<p>The Connectivity Manager is the single point of entry for information (data) in the SMILA. It's functionality is devided into several Sub-Components for better modularization. The Connectivity Manager, and it's Sub-Components, should all be implemented in Java. The external interfaces should also support SCA.
</p>
<a name="Overview"></a><h3> <span class="mw-headline"> Overview </span></h3>
<p>This chart shows the Connectivity Manager, it's Sub-Components and their relationship as well as the relationship to other components&nbsp;:
<a href="http://wiki.eclipse.org/Image:Connectivity_Module.png" class="image" title="Image:Connectivity_Module.png"><img alt="Image:Connectivity_Module.png" src="http://wiki.eclipse.org/images/c/cc/Connectivity_Module.png" width="960" height="720" border="0" /></a>
The connections using arrowheads represent the actual flow of data to/in/from the Connectivity Manager. See section <a href="Connectivity.html#Workflow" title="">Workflow</a> for detailed information.
The XML and binary storages are global storages also accessible by BPEL.
</p>
<a name="Sub-Components"></a><h3> <span class="mw-headline"> Sub-Components </span></h3>
<a name="APIs"></a><h4> <span class="mw-headline"> APIs </span></h4>
<p>Probably the Connectivity Manager has to provide more than one interface/technologies for access. The main interface is used by IRMs to provide crawled data objects. But it may also be used from within BPEL processes or from the Publish/Subscribe Module. This concepts focuses on the interfaces used by IRMs. I also decided to integrate the Delta Indexing Manager functionality as a Sub-Component in the Connectivity Manager. But parts of it's API are accessible via the Connectivity Manager interface. The Connectivity Manager's APIs should be available via SCA, but there is no such need for the Sub-Components.
</p>
<a name="Processor"></a><h4> <span class="mw-headline"> Processor </span></h4>
<p>The Processor is the core of the Connectivity Manager, it does the actual processing of the incoming data objects. The incoming data is stored depending on it's type:
</p>
<ul><li> large or binary data is stored in a binary store (eg. distributed filesystem)
</li><li> all other data os stored in a XML store (e.g. XML database)
</li></ul>
<p>The Processor also creates the message object to be enqueued. A message contains the unique ID of the object, the Delta Indexing hash, routing information and any additional needed information. It should be configurable what information is part of a message.
The Processor should also be able to standardize incoming objects (either Records and/or MessageObjects of the 2nd alternative interface design) to the latest version (internal representation) or to reject them.
</p>
<a name="Buffer_.28P2.29"></a><h4> <span class="mw-headline"> Buffer (P2) </span></h4>
<p>The Buffer delays the enqueueing of outgoing messages. Therefore it needs a seperate Queue mechanism to temporarily store the messages. This has not to be mistaken with the Queue Servers\! The Buffer provides functionality to detect and resolve competing messages (add/update and delete of the same document).
</p><p>For a first release the Buffer functionality is of low priority (P2).
</p>
<a name="Router"></a><h4> <span class="mw-headline"> Router </span></h4>
<p>The Router routes messages to according Queues and/or BPEL workflows. The routing information (what whereto) has to be provided by configuration. The Router also has to update the Delta Indexing information accordingly. Neither the IRM nor the Connectivity Manager get's any feedback if/how a message was processed (successfully or if some error noccured). The only feedback the Router (and so the Connectivity Manager) gets is if a message was enqueed or not. Therefore after a message was successfully enqueued one of the following actions must be triggered by the Router:
</p>
<ul><li> add: create the Delta Indexing entry and mark as processed (visited)
</li><li> update: update the Delta Indexing entry and mark as processed (visited)
</li><li> delete: remove the Delta Indexing entry
</li></ul>
<p>It may be neccessary to directly access the Router after a BPEL workflow has finished to route a message to another Queue and therefore expand the API.
</p><p>{anchor:Delta Indexing Manager}
</p>
<a name="Delta_Indexing_Manager"></a><h4> <span class="mw-headline"> Delta Indexing Manager </span></h4>
<p>The Delta Indexing Manager stores information about last modification of each document (even compound elements) and can determine if a document has changed. The information about last modification should be some kind of Hash computed by the Crawler (see <a href="IRM.html" title="SMILA/Project Concepts/IRM">IRM</a> for further information.) It provides functionality to manage this information, to determine if documents have changed, to mark documents that have not changed (visited flag) and to determine documents that are indexed but no longer exist in the data source. The Delta Indexing Manager was moved inside the Connectivity Manager for these reasons:
</p>
<ul><li> some of it's functionality is used within the Connectivity Manager
</li><li> as a single point of access should "know" about the delta indexing information
</li><li> in a distributed system we only need one connection from a IRM to the Connectivity Manager and not a second one to access Delta Indexing Manager (this seems not to be a big gain, but may proove valid in high volume distributed scenarios)
</li></ul>
<p>Despite of being a part of the Connectivity Manager, the implementation of Delta Indexing Manager is still replaceable to provide different stores for the delta indexing information (e.g. database or even a search index).
</p><p>Here is a list of the information that needs to be stored by the Delta Indexing Manager:
</p>
<ul><li> ID: the id of the document
</li><li> Hash: the hash of the document to determine modifications
</li><li> DataSourceID: the id of the data source from where the document was provided. This is already part of the document's ID, but we need it as seperate value to clear by source
</li><li> IsCompound: flag, if the document is a compound object. This is needed to clean up recursively
</li><li> ParentID or ChildIDs: a reference to the parent document (if any exists) or refferences to child documents. This is needed to clean up recursively.
</li><li> VisitedFlag: flag that is temporary set during processing of a data source, to mark documents as visited. At the end all unmarked documents of a data source are deleted.
</li></ul>
<p>If this information is stored in a database we have to provide an efficient table scheme.{info:title=Improvement}
A further feature improvement of DeltaIndexing is to not only store information about data objects but also store information about hierarchy nodes (e.g. folders in filesystem or exchange). Assuming that hierarchy nodes now if any of their sub elements (data objects or hierarchy nodes) have changed, delta indexing performs faster as complete hierarchy levels can be skipped during crawling. A hierarchy does not have to be a tree but may be a graph as well. So data objects and hierarchy nodes may be refferenced by more than one hierarchy node, it's not a 1:1 child - parent relationship. Special care has to be taken when deleting hierarchy nodes to not delete elements that are refferenced by other hierarchy nodes. Crawlers neeed to be adopted so that hierachy nodes are returned, too.
I suggest to make this feature P2, as such an optimization is not neccessary for a first release..{info}{anchor:Interfaces}
</p>
<a name="Interfaces"></a><h3> <span class="mw-headline"> Interfaces </span></h3>
<p>The following data types are used in the Interfaces:
</p>
<ul><li> DataObject: contains the objects ID, the hash value used for delta Indexing and all information (xml and bin)
</li><li> ID: a unique id of a DataObject. See <a href="ID_Concept.html" title="SMILA/Project Concepts/ID Concept">ID Concept</a> for details about IDs.
</li></ul>
<p>The Sub-Component Processor has no interface itself, but it implements the Connectivity Manager Interface.
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">interface</span> ConnectivityManager
<span class="br0">&#123;</span>
<span class="kw4">void</span> add<span class="br0">&#40;</span>DataObject data<span class="br0">&#41;</span>;
<span class="kw4">void</span> update<span class="br0">&#40;</span>DataObject data<span class="br0">&#41;</span>;
<span class="kw4">void</span> delete<span class="br0">&#40;</span>ID id<span class="br0">&#41;</span>;
&nbsp;
<span class="co1">// external interface for DeltaIndexing</span>
<span class="kw4">void</span> clear<span class="br0">&#40;</span><span class="br0">&#41;</span>;
<span class="kw4">void</span> clear<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span>;
<span class="kw4">void</span> init<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span>;
<span class="kw4">boolean</span> checkForUpdate<span class="br0">&#40;</span>ID id, <span class="kw3">String</span> hash<span class="br0">&#41;</span>;
<span class="kw4">void</span> deleteDelta<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span>;
<span class="kw4">void</span> deleteDelta<span class="br0">&#40;</span>ID id<span class="br0">&#41;</span>; <span class="co1">// to handle delta deletion for a single compound object and it's elements (recursion&nbsp;!)</span>
<span class="br0">&#125;</span></pre></div>
<p>An alternative to optimize network overhead is to provide methods supporting Lists of objects.
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">interface</span> ConnectivityManager
<span class="br0">&#123;</span>
&nbsp;
<span class="kw4">void</span> add<span class="br0">&#40;</span>List&lt;DataObject&gt; data<span class="br0">&#41;</span>;
<span class="kw4">void</span> update<span class="br0">&#40;</span>List&lt;DataObject&gt; data<span class="br0">&#41;</span>;
<span class="kw4">void</span> delete<span class="br0">&#40;</span>List&lt;ID&gt; ids<span class="br0">&#41;</span>;
List&lt;boolean&gt; checkForUpdate<span class="br0">&#40;</span>List&lt;DeltaIndexInfo&gt; infos<span class="br0">&#41;</span>;
<span class="br0">&#125;</span>
&nbsp;
<span class="kw1">class</span> DeltaIndexInfo
<span class="br0">&#123;</span>
ID id;
<span class="kw3">String</span> hash;
<span class="br0">&#125;</span></pre></div>
<p><br />
It makes especially sense for checkUpdate() and delete(), as the parameters of those methods do not contain much data. The size of the DataObject used in add() and update() varies greatly.
For smaller objects it makes sense to provide Lists, for larger objects not. Of course the maximum List size should be configurable but who decides what List size to use (1 vs. N)? This has to be done dynamically to avoid memory errors\!
Another problem is that the Iterator concept used in Crawlers does not fit to methods with Lists. Therefore the iteration over DeltaIndexInfo must be seperated from getting the DataObjects. For some Crawlers/DataSources it may be difficult or even impossible to support direct access on objects outside of an iteration. All of this complicates the logic\!
</p><p>{note:title=2nd Alternative Interface }
On page <a href="ConnectivityMessageInterface.html" title="SMILA/Project Concepts/ConnectivityMessageInterface">ConnectivityMessageInterface</a> you will find another alternative for the Connectivity Managers interface based on messages.{note}
</p><p><br />
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">interface</span> DeltaIndexingManager
<span class="br0">&#123;</span>
<span class="kw4">void</span> clear<span class="br0">&#40;</span><span class="br0">&#41;</span>;
<span class="kw4">void</span> clear<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span>;
<span class="kw4">void</span> init<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span>;
<span class="kw4">void</span> finish<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span>;
<span class="kw4">boolean</span> checkForUpdate<span class="br0">&#40;</span>ID id, <span class="kw3">String</span> hash<span class="br0">&#41;</span>; <span class="co1">// to reduce method calls mark entry as visited on return value false</span>
<span class="kw4">void</span> visit<span class="br0">&#40;</span>ID id, <span class="kw3">String</span> hash<span class="br0">&#41;</span>;
<span class="kw4">void</span> delete<span class="br0">&#40;</span>ID id<span class="br0">&#41;</span>;
List&lt;Identity&gt; getObsoleteIDs<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span>;
List&lt;Identity&gt; getObsoleteIDs<span class="br0">&#40;</span>ID id<span class="br0">&#41;</span>; <span class="co1">// for compounds</span>
<span class="br0">&#125;</span></pre></div>
<ul><li> clear: clears the complete state information
</li><li> clear: clears the state information for one dataSourceID
</li><li> init: initializes the internal state for an import of a dataSourceID and establishes a lock to avoid that the same dataSourceID ist initialized multiple times concurrently
</li><li> checkForUpdate: checks for the hash of the current id is new or has changed (true) or not (false)
</li><li> visit: updates the hash and marks this id as visited
</li><li> getObsoleteIDs: returns the entries that have not been marked as visited
</li><li> finish: removes the lock
</li></ul>
<p><br />
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">interface</span> Buffer
<span class="br0">&#123;</span>
<span class="kw4">void</span> store<span class="br0">&#40;</span>Message msg<span class="br0">&#41;</span>;
<span class="br0">&#125;</span></pre></div>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">interface</span> Router
<span class="br0">&#123;</span>
<span class="kw4">void</span> route<span class="br0">&#40;</span>Message msg<span class="br0">&#41;</span>;
<span class="br0">&#125;</span></pre></div>
<p>{info:title=Conclusion}
During the team meeting on April 15 we agreed to implement the "1st alternative interface", employing lists to reduce communication overhead. In addition, the Crawler interface will also support lists. A Message-like interface or a pure XML interface may be adde later on.
{info}
</p>
<a name="Workflow"></a><h3> <span class="mw-headline"> Workflow </span></h3>
<p>Here follows a description of the workflow when used by an IRM:
</p>
<ul><li>Crawler*
</li><li> the CrawlerController initializes an import by calling init() to reset Delta Indexing Manager visited flags. It should not be allowed to concurrently import the same DataSource, Connectivity and DeltaIIndexing Manager have to enshure this.
</li><li> for each Record (only ID and hash) the CrawlerController receives by the Crawler, it asks the Connectivity Manager (internally the DeltaIndexing Manager) if it needs to be added/updated
<ul><li> no:
<ul><li> mark the Delta Indexing entry as visited
</li></ul>
</li><li> yes:
<ul><li> the Record (now with all data) is sent to the Connectivity Manager. The Connectivity Manager should not request the data object via callback. Crawlers should be the active components that send/request information.
</li><li> the Processor stores the Record data in the external stores (Bin and XML) and creates an add/update Message and sends it to the Buffer
</li><li> the Buffer applies it's logic (holding back the message some time, checking for conflicts) and sends the add/update message to the Router
</li><li> the Router routes the add/update Message to the appropriate Queue/BPEL workflow
<ul><li> add: creates a Delta Indexing entry for the object and marks it as visited
</li><li> update: updates the Delta Indexing entry for the object and marks it as visited
</li></ul>
</li></ul>
</li></ul>
</li><li> after the iteration has finished the CrawlerController tells the Connectivity Manager to perform Delta Indexing Manager Delete
<ul><li> the Processor checks the Buffer, until no more messages belonging to this DataSource are on hold
</li><li> the Processor gets the list of objects to be deleted from the Delta Indexing Manager
</li><li> the Processor creates N delete messages and adds each to the Buffer and calls Delta Index Manager finish() (cleans up the visited flags)
</li><li> the Buffer applies it's logic and sends each delete message to the Router
</li><li> the Router routes the delete Message to the appropriate Queue/BPEL workflow and removes the corresponding entry from the Delta Indexing Manager {info:title=Useful Information}For better performance it may be preferable to not create a single delete Message for each oject but to create one delete Message with a list of IDs to be deleted. DeltaIndexing should then support a delete(List&lt;ID&gt;) metod. But this will complicate the Buffer logic. Is it possible to pack a list of IDs in a message&nbsp;?{info}
</li></ul>
</li></ul>
<ul><li>Agent*
</li><li> the Agent sends a add Record (all data) to the AgentController, which in turn calls add on the ConnectivityManager. No DeltaIndex is done (init() is not called) so no component applies DeltaIndexing logic.
<ul><li> the Processor stores the Record data in the external stores (Bin and XML) and creates an add Message and sends it to the Buffer
</li><li> the Buffer applies it's logic (holding back the message some time, checking for conflicts) and sends the add message to the Router
</li><li> the Router routes the add Message to the appropriate Queue/BPEL workflow
</li><li> add: creates a Delta Indexing entry for the object and marks it as visited
</li></ul>
</li><li> the Agent sends a update Record (all data) to the AgentController, which in turn calls update on the ConnectivityManager
<ul><li> the Processor stores the Record data in the external stores (Bin and XML) and creates an update Message and sends it to the Buffer
</li><li> the Buffer applies it's logic (holding back the message some time, checking for conflicts) and sends the update message to the Router
</li><li> the Router routes the update Message to the appropriate Queue/BPEL workflow
</li></ul>
</li><li> the Agent sends a delete Record (just ID) to the AgentController, which in turn calls delete on the ConnectivityManager
<ul><li> the Processor creates an delete Message and sends it to the Buffer
</li><li> the Buffer applies it's logic (holding back the message some time, checking for conflicts) and sends the delete message to the Router
</li><li> the Router routes the delete Message to the appropriate Queue/BPEL workflow
</li></ul>
</li></ul>
<!--
NewPP limit report
Preprocessor node count: 28/1000000
Post-expand include size: 0/2097152 bytes
Template argument size: 0/2097152 bytes
#ifexist count: 0/100
-->
<!-- Saved in parser cache with key wikidb:pcache:idhash:15224-0!1!0!!en!2!edit=0 and timestamp 20120203101507 -->
<div class="printfooter">
Retrieved from "<a href="Connectivity.html">http://wiki.eclipse.org/SMILA/Project_Concepts/Connectivity</a>"</div>
<!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<!-- Yoink of toolbox for phoenix moved up -->
</div>
</div>
<div id="clearFooter"/>
<div id="footer" >
<ul id="footernav">
<li class="first"><a href="http://www.eclipse.org/">Home</a></li>
<li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li>
<li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li>
</ul>
<span id="copyright">Copyright &copy; 2012 The Eclipse Foundation. All Rights Reserved</span>
<p id="footercredit">This page was last modified 12:17, 13 August 2008 by <a href="http://wiki.eclipse.org/User:Daniel.stucky.empolis.com" title="User:Daniel.stucky.empolis.com">Daniel Stucky</a>. </p>
<p id="footerviews">This page has been accessed 1,925 times.</p>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
var pageTracker = _gat._getTracker("UA-910670-4");
pageTracker._trackPageview();
</script>
<!-- <div class="visualClear"></div> -->
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served in 0.218 secs. --></body></html>