blob: f4bb777a3b6163588ea7b54460ec434faf28d1e1 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09,SMILA/Documentation/DeltaIndexingManager,SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09/Separate Interfaces for ConnectivityManager and DeltaIndexingManager,SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09/Turning DeltaIndexing On or Off,SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09/Usage of DeltaIndexingManager by CrawlerControler alone,Daniel.stucky.empolis.com" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/Specifications/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" />
<link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=rss" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=atom" />
<title>SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09 - Eclipsepedia</title>
<style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style>
<link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" />
<link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" />
<!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]-->
<!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]-->
<!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]-->
<!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]-->
<!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]-->
<!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "eclipsenova";
var stylepath = "/skins";
var wgArticlePath = "/$1";
var wgScriptPath = "";
var wgScript = "/index.php";
var wgServer = "http://wiki.eclipse.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09";
var wgTitle = "SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09";
var wgAction = "view";
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
var wgArticleId = "15436";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = "175156";
var wgVersion = "1.12.0";
var wgEnableAPI = true;
var wgEnableWriteAPI = false;
/*]]>*/</script>
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script>
<!-- Performance mods similar to those for bug 166401 -->
<script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=eclipsenova"><!-- site js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script>
<style type="text/css">/*<![CDATA[*/
.source-java {line-height: normal; font-size: medium;}
.source-java li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for java
* CSS class: source-java, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-java .de1, .source-java .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-java {}
.source-java .head {}
.source-java .foot {}
.source-java .imp {font-weight: bold; color: red;}
.source-java .ln-xtra {color: #cc0; background-color: #ffc;}
.source-java li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-java li.li2 {font-weight: bold;}
.source-java .kw1 {color: #7F0055; font-weight: bold;}
.source-java .kw2 {color: #7F0055; font-weight: bold;}
.source-java .kw3 {color: #000000; font-weight: normal}
.source-java .kw4 {color: #7F0055; font-weight: bold;}
.source-java .co1 {color: #3F7F5F; font-style: italic;}
.source-java .co2 {color: #3F7F5F;}
.source-java .co3 {color: #3F7F5F; font-style: italic; font-weight: bold;}
.source-java .coMULTI {color: #3F5FBF; font-style: italic;}
.source-java .es0 {color: #000000;}
.source-java .br0 {color: #000000;}
.source-java .st0 {color: #2A00ff;}
.source-java .nu0 {color: #000000;}
.source-java .me1 {color: #000000;}
.source-java .me2 {color: #000000;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><link rel="stylesheet" type="text/css" href="DeltaIndexingAndConnectivtyDiscussion09.html" /> </head>
<body class="mediawiki ns-0 ltr page-SMILA_Specifications_DeltaIndexingAndConnectivtyDiscussion09">
<div id="globalWrapper">
<div id="column-one">
<!-- Eclipse Additions for the Top Nav start here M. Ward-->
<div id="header">
<div id="header-graphic">
<img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki">
</div>
<!-- Pulled 101409 Mward -->
<div class="portlet" id="p-personal">
<div class="pBody">
<ul>
<li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09">Log in</a></li>
</ul>
</div>
</div>
<div id="header-icons">
<div id="sites">
<ul id="sitesUL">
<li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li>
<li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li>
<li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li>
<li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li>
<li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li>
<li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li>
</ul>
</div>
</div>
</div>
<!-- NEW HEADER STUFF HERE -->
<div id="header-menu">
<div id="header-nav">
<ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li>
<li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li>
<li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li>
<li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li>
<li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li>
<li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li>
<li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li>
</ul>
</div>
<div id="header-utils">
<!-- moved the search window here -->
<form action="http://wiki.eclipse.org/Special:Search" >
<input class="input" name="search" type="text" accesskey="f" value="" />
<input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" />&nbsp;
<input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" />
</form>
</div>
</div>
<!-- Eclipse Additions for the Header stop here -->
<!-- Additions and mods for leftside nav Start here -->
<!--Started nav rip here-->
<!-- these are the nav controls main page, changes etc -->
<div id="novaContent" class="faux">
<div id="leftcol">
<ul id="leftnav">
<!-- these are the page controls, edit history etc -->
<li class="separator"><a class="separator">Navigation &#160;&#160;</li>
<li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li>
<li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li>
<li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li>
<li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li>
<li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li>
<li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li>
<li class="separator"><a class="separator">Toolbox &#160;&#160;</a></li>
<li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09">What links here</a></li>
<li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09">Related changes</a></li>
<!-- This is the toolbox section -->
<li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li>
<li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li>
<li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09&amp;printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09&amp;oldid=175156">Permanent link</a></li> </ul>
</div>
<!-- Additions and mods for leftside nav End here -->
<div id="column-content">
<div id="content">
<a name="top" id="top"></a>
<div id="tabs">
<ul class="primary">
<li class="active"><a href="DeltaIndexingAndConnectivtyDiscussion09.html"><span class="tab">Page</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09&amp;action=edit"><span class="tab">Discussion</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09&amp;action=edit"><span class="tab">View source</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09&amp;action=history"><span class="tab">History</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09"><span class="tab">Edit</span></a></li>
</ul>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<h1 class="firstHeading">SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09</h1>
<div id="bodyContent">
<h3 id="siteSub">From Eclipsepedia</h3>
<div id="contentSub"><span class="subpages">&lt; <a href="../../SMILA.html" title="SMILA">SMILA</a> | <a href="../Specifications.1.html" title="SMILA/Specifications">Specifications</a></span></div>
<div id="jump-to-nav">Jump to: <a href="DeltaIndexingAndConnectivtyDiscussion09.html#column-one">navigation</a>, <a href="DeltaIndexingAndConnectivtyDiscussion09.html#searchInput">search</a></div> <!-- start content -->
<p>
</p>
<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1"><a href="DeltaIndexingAndConnectivtyDiscussion09.html#Motivation_for_this_page_and_usage"><span class="tocnumber">1</span> <span class="toctext">Motivation for this page and usage</span></a></li>
<li class="toclevel-1"><a href="DeltaIndexingAndConnectivtyDiscussion09.html#Ideas__and_Problems_.28under_discussion.29"><span class="tocnumber">2</span> <span class="toctext">Ideas and Problems (under discussion)</span></a>
<ul>
<li class="toclevel-2"><a href="DeltaIndexingAndConnectivtyDiscussion09.html#DeltaIndexing_reflects_crawl_state_rather_than_index_state"><span class="tocnumber">2.1</span> <span class="toctext">DeltaIndexing reflects crawl state rather than index state</span></a></li>
<li class="toclevel-2"><a href="DeltaIndexingAndConnectivtyDiscussion09.html#Extract_Session_Interface_from_DeltaIndexingManager"><span class="tocnumber">2.2</span> <span class="toctext">Extract Session Interface from DeltaIndexingManager</span></a>
<ul>
<li class="toclevel-3"><a href="DeltaIndexingAndConnectivtyDiscussion09.html#Discussion"><span class="tocnumber">2.2.1</span> <span class="toctext">Discussion</span></a>
<ul>
<li class="toclevel-4"><a href="DeltaIndexingAndConnectivtyDiscussion09.html#modifications_to_the_interfaces"><span class="tocnumber">2.2.1.1</span> <span class="toctext">modifications to the interfaces</span></a></li>
</ul>
</li>
<li class="toclevel-3"><a href="DeltaIndexingAndConnectivtyDiscussion09.html#Usage_of_DeltaIndexingManager_by_CrawlerControler_alone"><span class="tocnumber">2.2.2</span> <span class="toctext">Usage of DeltaIndexingManager by CrawlerControler alone</span></a></li>
</ul>
</li>
</ul>
</li>
<li class="toclevel-1"><a href="DeltaIndexingAndConnectivtyDiscussion09.html#Implemented_Changes"><span class="tocnumber">3</span> <span class="toctext">Implemented Changes</span></a></li>
</ul>
</li>
</ul>
</li>
</ul>
</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
<a name="Motivation_for_this_page_and_usage"></a><h2> <span class="mw-headline"> Motivation for this page and usage </span></h2>
<p>the current implementation for the <a href="../Documentation/DeltaIndexingManager.html" title="SMILA/Documentation/DeltaIndexingManager"> DeltaIndexingManager</a> has several problems or short comings which are listed under the section <a href="DeltaIndexingAndConnectivtyDiscussion09.html#Ideas_.28under_discussion.29" title="">Ideas (under discussion)</a>. if the idea is rather large, an own page is usually better and should be created as a child to this page. it still should have an own section that at least must contain a link to the page..
</p><p>The initiating authors should edit only their own sections and not those of others.
</p><p>each subsection/page should state:
</p>
<ul><li> context such as: author, data, based on SVN revision
</li><li> motivation/problem
</li><li> a solution proposal
</li></ul>
<p>ideas that have been implemented are moved to their own page and referenced in <a href="DeltaIndexingAndConnectivtyDiscussion09.html#Implemented_Changes" title="">Implemented Changes</a>.
</p>
<a name="Ideas__and_Problems_.28under_discussion.29"></a><h2> <span class="mw-headline"> Ideas and Problems (under discussion) </span></h2>
<a name="DeltaIndexing_reflects_crawl_state_rather_than_index_state"></a><h3> <span class="mw-headline"> DeltaIndexing reflects crawl state rather than index state </span></h3>
<p>One Problem at the moment is, that because SMILA's processing of incoming Records is asynchronous, DeltaIndexing does NOT really reflect the state of a Record in the index, as there is no guarantee that a Record is indexed after it was successfully added to the Queue. This could be achieved by implementing Notifications that update the DeltaIndexing state using this information. If this is done, then the computation of DeltaIndexing-Delete has to wait for all Queue entries to pass the workflow. This is a complex process which seems to be error-prone. Is it really necessary to reflect the index state or is it enough to reflect the last crawl state&nbsp;?
</p>
<a name="Extract_Session_Interface_from_DeltaIndexingManager"></a><h3> <span class="mw-headline"> Extract Session Interface from DeltaIndexingManager </span></h3>
<p>For a better separation of tasks and an easy handling of locks on data sources during a delta indexing run, we could introduce the following interfaces. The implementations should only be proxies using the same DeltaIndexingManager service implementation, so that a DeltaIndexingSession may internally use another service if the initial one becomes unavailable.
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">interface</span> DeltaIndexingManager
<span class="br0">&#123;</span>
<span class="coMULTI">/**
* Initializes a new DeltaIndexingSession if the datasource is not locked.
*/</span>
DeltaIndexingSession init<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Clear all data sources that are not locked.
*/</span>
<span class="kw4">void</span> clear<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Clears the data source if not locked.
*/</span>
<span class="kw4">void</span> clear<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Unlocks all data sources by force.
*/</span>
<span class="kw4">void</span> unlockDatasources<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Checks if a data source exists.
*/</span>
<span class="kw4">boolean</span> exists<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceId<span class="br0">&#41;</span>;
<span class="br0">&#125;</span></pre></div>
<p><br />
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">interface</span> DeltaIndexingSession
<span class="br0">&#123;</span>
<span class="coMULTI">/**
* Checks if the id needs to be updated.
*/</span>
<span class="kw4">boolean</span> checkForUpdate<span class="br0">&#40;</span>Id id, <span class="kw3">String</span> hash<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Maks the id as visited.
*/</span>
<span class="kw4">void</span> visit<span class="br0">&#40;</span>Id id, <span class="kw3">String</span> hash<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Returns an iterator over all unvistied ids of the data source
*/</span>
Iterator&lt;Id&gt; obsoleteIdIterator<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Returns an iterator over all unvistied ids of a parent id (compound objects)
*/</span>
Iterator&lt;Id&gt; obsoleteIdIterator<span class="br0">&#40;</span>Id id<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Deletes the id.
*/</span>
<span class="kw4">void</span> delete<span class="br0">&#40;</span>Id id<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Finishes the deltaindexing run and unlocks the data source.
*/</span>
<span class="kw4">void</span> finish<span class="br0">&#40;</span><span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
<span class="br0">&#125;</span></pre></div>
<p><b>This approach was not realized.</b>
But a sessionId was introduced to distinguish between different sessions without relying on thread ids. See <a href="https://bugs.eclipse.org/bugs/show_bug.cgi?id=279243" class="external free" title="https://bugs.eclipse.org/bugs/show_bug.cgi?id=279243" rel="nofollow">https://bugs.eclipse.org/bugs/show_bug.cgi?id=279243</a>
</p><p><br />
</p>
<a name="Discussion"></a><h4> <span class="mw-headline"> Discussion </span></h4>
<a name="modifications_to_the_interfaces"></a><h5> <span class="mw-headline"> modifications to the interfaces </span></h5>
<p>TM 2009 10 15:
i second the notion to extract a session interface. but i also would do a few renames and changes like so:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">public</span> <span class="kw1">interface</span> IDeltaIndexingManager <span class="br0">&#123;</span>
&nbsp;
<span class="coMULTI">/**
* Initializes the internal state for an import of a dataSourceID and creates a session wherein it establishes a lock
* to avoid that the same dataSourceID is initialized multiple times concurrently. It returns an object for the session
* that a client has to use to gain access to the locked data source.
*
* @param dataSourceID
* dataSourceID
*
* @return the i delta indexing session
*
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
IDeltaIndexingSession createSession<span class="br0">&#40;</span><span class="kw1">final</span> <span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/* methods that don't need a session */</span>
&nbsp;
<span class="coMULTI">/**
* Clears all entries of the DeltaIndexingManager including sessions.
*
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">void</span> clear<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Unlock the given data source and removes the sessions.
*
* @param dataSourceID
* the data source id
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">void</span> unlockDatasource<span class="br0">&#40;</span><span class="kw1">final</span> <span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Unlock all data sources and removes all sessions.
*
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">void</span> unlockDatasources<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Gets an overview what data sources are locked or unlocked.
*
* @return a map containing the dataSoureId and the LockState
*/</span>
Map&lt;String, LockState&gt; getLockStates<span class="br0">&#40;</span><span class="br0">&#41;</span>;
&nbsp;
<span class="coMULTI">/**
* Checks if the entries for the given dataSourceId exist.
*
* @param dataSourceId
* the data source id
*
* @return true, if successful
*/</span>
<span class="kw4">boolean</span> dataSourceExists<span class="br0">&#40;</span><span class="kw1">final</span> <span class="kw3">String</span> dataSourceId<span class="br0">&#41;</span>;
&nbsp;
<span class="coMULTI">/**
* Get the number of delta indexing entries for the given dataSourceID.
*
* @param dataSourceID
* the data source id
* @return the number of entries
*/</span>
<span class="kw4">long</span> getEntryCount<span class="br0">&#40;</span><span class="kw1">final</span> <span class="kw3">String</span> dataSourceID<span class="br0">&#41;</span>;
&nbsp;
<span class="coMULTI">/**
* Get the number of delta indexing entries for all data sources.
*
* @return a map of dataSoureIds and the entry counts
*/</span>
Map&lt;String, Long&gt; getEntryCounts<span class="br0">&#40;</span><span class="br0">&#41;</span>;
&nbsp;
<span class="coMULTI">/**
* An enumeration defining the lock states a data source in the DeltaIndexingManager.
*/</span>
<span class="kw1">public</span> <span class="kw1">enum</span> LockState <span class="br0">&#123;</span>
<span class="coMULTI">/**
* The lock states.
*/</span>
LOCKED, UNLOCKED;
<span class="br0">&#125;</span>
<span class="br0">&#125;</span>
&nbsp;
<span class="coMULTI">/**
* The Interface IDeltaIndexingSession.
*
* @author tmenzel
*/</span>
<span class="kw1">public</span> <span class="kw1">interface</span> IDeltaIndexingSession <span class="br0">&#123;</span>
&nbsp;
<span class="coMULTI">/**
* Clear all entries of the given sessionId.
*
* @throws DeltaIndexingSessionException
* if the sessionId is invalid
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">void</span> clear<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingSessionException, DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Finish this delta indexing session and remove the lock.
*
* @throws DeltaIndexingSessionException
* if the sessionId is invalid
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">void</span> commit<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingSessionException, DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Delete.
*
* @param id
* the id
*
* @throws DeltaIndexingSessionException
* if the sessionId is invalid
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">void</span> delete<span class="br0">&#40;</span><span class="kw1">final</span> Id id<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingSessionException, DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Delete untouched ids. rather than calling {@link #delete(Id)} by the controller when iterating thru the ids, the
* implementation may do so internally for all untouched ids in one go more efficiently.
*
* @param id
* the id
*
* @return the number of deleted ids
*
* @throws DeltaIndexingSessionException
* the delta indexing session exception
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">long</span> deleteUntouchedIds<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingSessionException, DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Obsolete id iterator.
*
*
* @return the iterator&lt; id&gt;
*
* @throws DeltaIndexingSessionException
* if the sessionId is invalid
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
Iterator&lt;Id&gt; getUntouchedIds<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingSessionException, DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Obsolete id iterator for id fragments.
*
* @param id
* the id
*
* @return the iterator&lt; id&gt;
*
* @throws DeltaIndexingSessionException
* if the sessionId is invalid
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
Iterator&lt;Id&gt; getUntouchedIds<span class="br0">&#40;</span><span class="kw1">final</span> Id id<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingSessionException, DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* checks if the hash of the current id is new or has changed (true) or not (false). //
*
* to reduce method calls mark entry as visited on return value false
*
* @param id
* the id
* @param hash
* the hash
*
* @return true, if checks for changed
*
* @throws DeltaIndexingSessionException
* the delta indexing session exception
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">boolean</span> hasChanged<span class="br0">&#40;</span><span class="kw1">final</span> Id id, <span class="kw1">final</span> <span class="kw3">String</span> hash<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingSessionException, DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* rolls back changes that were made in the curreent session between init() and finish(), it should be called before
* finishing process.
*
* @throws DeltaIndexingSessionException
* if the sessionId is invalid
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">void</span> rollback<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingSessionException, DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* Creates or updates the delta indexing entry. this is THE method to make the record known to DI. It sets the hash,
* the isCompound flag and marks this id as visited.
*
* @param id
* the id
* @param hash
* the hash
* @param isCompound
* boolean flag if the record identified by id is a compound record (true) or not (false)
*
* @throws DeltaIndexingSessionException
* if the sessionId is invalid
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">void</span> touch<span class="br0">&#40;</span><span class="kw1">final</span> Id id, <span class="kw1">final</span> <span class="kw3">String</span> hash, <span class="kw1">final</span> <span class="kw4">boolean</span> isCompound<span class="br0">&#41;</span> <span class="kw1">throws</span> DeltaIndexingSessionException,
DeltaIndexingException;
&nbsp;
<span class="coMULTI">/**
* this is a combination of {@link #hasChanged(Id, String)} and {@link #touch(Id, String, boolean)} in one step.
* &lt;p&gt;
* It has a perf. gain over calling the methods seperatly but has the drawback, that the record is always touched
* independently of an exception that occurs before putting the record into the Q. on the other hand, this matters not
* much as the subsequent processing may also cause errors which arent reflected in the &quot;touch&quot; state.
*
* @param id
* the id
* @param hash
* the hash
* @param isCompound
* the is compound
*
* @return true, if successful
*
* @throws DeltaIndexingSessionException
* the delta indexing session exception
* @throws DeltaIndexingException
* the delta indexing exception
*/</span>
<span class="kw4">boolean</span> checkAndTouch<span class="br0">&#40;</span><span class="kw1">final</span> Id id, <span class="kw1">final</span> <span class="kw3">String</span> hash, <span class="kw1">final</span> <span class="kw4">boolean</span> isCompound<span class="br0">&#41;</span>
<span class="kw1">throws</span> DeltaIndexingSessionException, DeltaIndexingException;
&nbsp;
<span class="br0">&#125;</span></pre></div>
<p><br />
</p>
<a name="Usage_of_DeltaIndexingManager_by_CrawlerControler_alone"></a><h4> <span class="mw-headline"> Usage of DeltaIndexingManager by CrawlerControler alone </span></h4>
<p>Here is another idea based on the changes introduced with <a href="DeltaIndexingAndConnectivtyDiscussion09/Separate_Interfaces_for_ConnectivityManager_and_DeltaIndexingManager.html" title="SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09/Separate Interfaces for ConnectivityManager and DeltaIndexingManager">SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09/Separate_Interfaces_for_ConnectivityManager_and_DeltaIndexingManager</a> but taking it further that not the CrawlerController communicates with DeltaIndexingManager but each Crawler:
</p><p>This is a radical change as it also affects the Crawler interface. Crawlers could directly communicate with the DeltaIndexingManager and provide only those Records that pass DeltaIndexing (are new, nedd an update). CrawlerController and Crawler could implement a Consumer/Producer pattern which should improve performance. No more sending of arrays with DIInformation and thereafter retrieving the Record objects. DeltaIndexing-Delete information is computed in the Crawler and can passed to the CrawlerController as regular Records (only the ID is set) and a delete flag to notify the CrawlerController that this Record is to be deleted. This should reduce communication overhead, as the DIInformation has not to be passed between multiple components and the whole process can work multithreaded. Of course this adds a lot more logic to the Crawler and demands more knowledge from a Crawler developer. It would also mean that ID and HASH are generated in the Crawler. The downside is that each Crawler has to implement the DeltaIndexing workflow themselves. <br />We could even move all execution logic to the Crawler. CrawlerController would become obsolete. Then Crawlers would handle everything themselves - communication with DeltaIndexingManager, CoumpoundHandlers and ConnectivityManager. I think in this way the best performance can be achieved, as the setup is the very simple. No unnecessary passing of data between components. But a lot of logic has to be re-implemented in every Crawler. I wonder if there is a chance to minimize this.
</p><p>(an <a href="DeltaIndexingAndConnectivtyDiscussion09/Usage_of_DeltaIndexingManager_by_CrawlerControler_alone.html" title="SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09/Usage of DeltaIndexingManager by CrawlerControler alone"> empty page</a> exists for this already)
</p>
<a name="Implemented_Changes"></a><h2> <span class="mw-headline"> Implemented Changes </span></h2>
<table width="100%" style="font-family: Trebuchet MS;vertical-align:text-top;" border="1">
<tr style="font-weight:bold;font-size:larger;color:white;background:gray;text-align:center;">
<td> Page </td><td> Date </td><td> Bug </td><td> Author(s)
</td></tr>
<tr>
<td> <a href="DeltaIndexingAndConnectivtyDiscussion09/Turning_DeltaIndexing_On_or_Off.html" title="SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09/Turning DeltaIndexing On or Off">New Feature: DeltaIndexing On/Off </a> </td><td> 2009-06-10 </td><td> <a href="https://bugs.eclipse.org/bugs/show_bug.cgi?id=279242" class="external text" title="https://bugs.eclipse.org/bugs/show_bug.cgi?id=279242" rel="nofollow">bug 279242</a> </td><td> <a href="http://wiki.eclipse.org/User:Daniel.stucky.empolis.com" title="User:Daniel.stucky.empolis.com">Daniel Stucky</a>
</td></tr>
<tr>
<td> <a href="DeltaIndexingAndConnectivtyDiscussion09/Separate_Interfaces_for_ConnectivityManager_and_DeltaIndexingManager.html" title="SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09/Separate Interfaces for ConnectivityManager and DeltaIndexingManager"> Separate Interfaces for ConnectivityManager and DeltaIndexingManager </a> </td><td> 2008-06&nbsp;? </td><td>&nbsp;? </td><td> <a href="http://wiki.eclipse.org/User:Daniel.stucky.empolis.com" title="User:Daniel.stucky.empolis.com">Daniel Stucky</a>
</td></tr></table>
<!--
NewPP limit report
Preprocessor node count: 28/1000000
Post-expand include size: 246/2097152 bytes
Template argument size: 12/2097152 bytes
#ifexist count: 0/100
-->
<!-- Saved in parser cache with key wikidb:pcache:idhash:15436-0!1!0!!en!2!edit=0 and timestamp 20120203101539 -->
<div class="printfooter">
Retrieved from "<a href="DeltaIndexingAndConnectivtyDiscussion09.html">http://wiki.eclipse.org/SMILA/Specifications/DeltaIndexingAndConnectivtyDiscussion09</a>"</div>
<div id="catlinks"><p class='catlinks'><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Category</a>: <span dir='ltr'><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></span></p></div> <!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<!-- Yoink of toolbox for phoenix moved up -->
</div>
</div>
<div id="clearFooter"/>
<div id="footer" >
<ul id="footernav">
<li class="first"><a href="http://www.eclipse.org/">Home</a></li>
<li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li>
<li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li>
</ul>
<span id="copyright">Copyright &copy; 2012 The Eclipse Foundation. All Rights Reserved</span>
<p id="footercredit">This page was last modified 06:56, 17 October 2009 by <a href="http://wiki.eclipse.org/User:Tmenzel.brox.de" title="User:Tmenzel.brox.de">thomas menzel</a>. Based on work by <a href="http://wiki.eclipse.org/User:Daniel.stucky.empolis.com" title="User:Daniel.stucky.empolis.com">Daniel Stucky</a>.</p>
<p id="footerviews">This page has been accessed 2,631 times.</p>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
var pageTracker = _gat._getTracker("UA-910670-4");
pageTracker._trackPageview();
</script>
<!-- <div class="visualClear"></div> -->
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served in 0.280 secs. --></body></html>