blob: 1cae0a23f0b6e7c34c417d6d524248aaaab325fb [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="SMILA/Project Concepts/ID Concept" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/Project_Concepts/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" />
<link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=rss" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=atom" />
<title>SMILA/Project Concepts/ID Concept - Eclipsepedia</title>
<style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "http://wiki.eclipse.org/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style>
<link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" />
<link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" />
<!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]-->
<!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]-->
<!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]-->
<!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]-->
<!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]-->
<!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "eclipsenova";
var stylepath = "/skins";
var wgArticlePath = "/$1";
var wgScriptPath = "";
var wgScript = "/index.php";
var wgServer = "http://wiki.eclipse.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "SMILA/Project_Concepts/ID_Concept";
var wgTitle = "SMILA/Project Concepts/ID Concept";
var wgAction = "view";
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
var wgArticleId = "15154";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = "159011";
var wgVersion = "1.12.0";
var wgEnableAPI = true;
var wgEnableWriteAPI = false;
/*]]>*/</script>
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script>
<!-- Performance mods similar to those for bug 166401 -->
<script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=eclipsenova"><!-- site js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script>
<style type="text/css">/*<![CDATA[*/
.source-xml {line-height: normal; font-size: medium;}
.source-xml li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for xml
* CSS class: source-xml, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-xml .de1, .source-xml .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-xml {}
.source-xml .head {}
.source-xml .foot {}
.source-xml .imp {font-weight: bold; color: red;}
.source-xml .ln-xtra {color: #cc0; background-color: #ffc;}
.source-xml li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-xml li.li2 {font-weight: bold;}
.source-xml .coMULTI {color: #808080; font-style: italic;}
.source-xml .es0 {color: #000099; font-weight: bold;}
.source-xml .br0 {color: #66cc66;}
.source-xml .st0 {color: #ff0000;}
.source-xml .nu0 {color: #cc66cc;}
.source-xml .sc0 {color: #00bbdd;}
.source-xml .sc1 {color: #ddbb00;}
.source-xml .sc2 {color: #339933;}
.source-xml .sc3 {color: #009900;}
.source-xml .re0 {color: #000066;}
.source-xml .re1 {font-weight: bold; color: black;}
.source-xml .re2 {font-weight: bold; color: black;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "http://wiki.eclipse.org/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><style type="text/css">/*<![CDATA[*/
.source-java {line-height: normal; font-size: medium;}
.source-java li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for java
* CSS class: source-java, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-java .de1, .source-java .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-java {}
.source-java .head {}
.source-java .foot {}
.source-java .imp {font-weight: bold; color: red;}
.source-java .ln-xtra {color: #cc0; background-color: #ffc;}
.source-java li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-java li.li2 {font-weight: bold;}
.source-java .kw1 {color: #7F0055; font-weight: bold;}
.source-java .kw2 {color: #7F0055; font-weight: bold;}
.source-java .kw3 {color: #000000; font-weight: normal}
.source-java .kw4 {color: #7F0055; font-weight: bold;}
.source-java .co1 {color: #3F7F5F; font-style: italic;}
.source-java .co2 {color: #3F7F5F;}
.source-java .co3 {color: #3F7F5F; font-style: italic; font-weight: bold;}
.source-java .coMULTI {color: #3F5FBF; font-style: italic;}
.source-java .es0 {color: #000000;}
.source-java .br0 {color: #000000;}
.source-java .st0 {color: #2A00ff;}
.source-java .nu0 {color: #000000;}
.source-java .me1 {color: #000000;}
.source-java .me2 {color: #000000;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "http://wiki.eclipse.org/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><link rel="stylesheet" type="text/css" href="ID_Concept.html" /> </head>
<body class="mediawiki ns-0 ltr page-SMILA_Project_Concepts_ID_Concept">
<div id="globalWrapper">
<div id="column-one">
<!-- Eclipse Additions for the Top Nav start here M. Ward-->
<div id="header">
<div id="header-graphic">
<img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki">
</div>
<!-- Pulled 101409 Mward -->
<div class="portlet" id="p-personal">
<div class="pBody">
<ul>
<li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Project_Concepts/ID_Concept">Log in</a></li>
</ul>
</div>
</div>
<div id="header-icons">
<div id="sites">
<ul id="sitesUL">
<li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li>
<li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li>
<li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li>
<li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li>
<li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li>
<li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li>
</ul>
</div>
</div>
</div>
<!-- NEW HEADER STUFF HERE -->
<div id="header-menu">
<div id="header-nav">
<ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li>
<li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li>
<li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li>
<li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li>
<li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li>
<li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li>
<li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li>
</ul>
</div>
<div id="header-utils">
<!-- moved the search window here -->
<form action="http://wiki.eclipse.org/Special:Search" >
<input class="input" name="search" type="text" accesskey="f" value="" />
<input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" />&nbsp;
<input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" />
</form>
</div>
</div>
<!-- Eclipse Additions for the Header stop here -->
<!-- Additions and mods for leftside nav Start here -->
<!--Started nav rip here-->
<!-- these are the nav controls main page, changes etc -->
<div id="novaContent" class="faux">
<div id="leftcol">
<ul id="leftnav">
<!-- these are the page controls, edit history etc -->
<li class="separator"><a class="separator">Navigation &#160;&#160;</li>
<li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li>
<li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li>
<li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li>
<li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li>
<li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li>
<li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li>
<li class="separator"><a class="separator">Toolbox &#160;&#160;</a></li>
<li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Project_Concepts/ID_Concept">What links here</a></li>
<li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Project_Concepts/ID_Concept">Related changes</a></li>
<!-- This is the toolbox section -->
<li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li>
<li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li>
<li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Project_Concepts/ID_Concept&amp;printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Project_Concepts/ID_Concept&amp;oldid=159011">Permanent link</a></li> </ul>
</div>
<!-- Additions and mods for leftside nav End here -->
<div id="column-content">
<div id="content">
<a name="top" id="top"></a>
<div id="tabs">
<ul class="primary">
<li class="active"><a href="ID_Concept.html"><span class="tab">Page</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Project_Concepts/ID_Concept&amp;action=edit"><span class="tab">Discussion</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Project_Concepts/ID_Concept&amp;action=edit"><span class="tab">View source</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Project_Concepts/ID_Concept&amp;action=history"><span class="tab">History</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Project&#32;Concepts/ID&#32;Concept"><span class="tab">Edit</span></a></li>
</ul>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<h1 class="firstHeading">SMILA/Project Concepts/ID Concept</h1>
<div id="bodyContent">
<h3 id="siteSub">From Eclipsepedia</h3>
<div id="contentSub"><span class="subpages">&lt; <a href="../../SMILA.html" title="SMILA">SMILA</a> | <a href="../Project_Concepts.1.html" title="SMILA/Project Concepts">Project Concepts</a></span></div>
<div id="jump-to-nav">Jump to: <a href="ID_Concept.html#column-one">navigation</a>, <a href="ID_Concept.html#searchInput">search</a></div> <!-- start content -->
<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1"><a href="ID_Concept.html#Description"><span class="tocnumber">1</span> <span class="toctext">Description</span></a></li>
<li class="toclevel-1"><a href="ID_Concept.html#Discussion"><span class="tocnumber">2</span> <span class="toctext">Discussion</span></a></li>
<li class="toclevel-1"><a href="ID_Concept.html#Technical_proposal"><span class="tocnumber">3</span> <span class="toctext">Technical proposal</span></a>
<ul>
<li class="toclevel-2"><a href="ID_Concept.html#Definition_of_concepts:"><span class="tocnumber">3.1</span> <span class="toctext">Definition of concepts:</span></a></li>
<li class="toclevel-2"><a href="ID_Concept.html#Record_ID_design"><span class="tocnumber">3.2</span> <span class="toctext">Record ID design</span></a></li>
<li class="toclevel-2"><a href="ID_Concept.html#Examples"><span class="tocnumber">3.3</span> <span class="toctext">Examples</span></a></li>
</ul>
</li>
</ul>
</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
<a name="Description"></a><h2> <span class="mw-headline"> Description </span></h2>
<p>The purpose of an ID is to identify an object in the system.
What is an object in SMILA?
</p>
<ul><li> simple case: a single document
</li><li> what about compounds?
<ul><li> archive files, e.g. ZIPs
</li><li> Big documents that should be indexed by page or by section
</li></ul>
</li></ul>
<ul><li> SMILA objects have a life cycle
<ul><li> creation in crawler or agent
</li><li> enrichment, splitting, merging (possible?) during processing in SMILA
</li><li> persisting in storages (possibly in different states of procesing) or indexes (usually at the end, but also possibly multiple times).
</li><li> process is repeated, when object source changes (index update) -&gt; new object must have same object ID.
</li><li> using the ID it must be possible to refer to the source object.
</li></ul>
</li></ul>
<p><br />
</p>
<a name="Discussion"></a><h2> <span class="mw-headline"> Discussion </span></h2>
<a name="Technical_proposal"></a><h2> <span class="mw-headline"> Technical proposal </span></h2>
<a name="Definition_of_concepts:"></a><h3> <span class="mw-headline"> Definition of concepts: </span></h3>
<ul><li> data source: a single location providing access to a colletion of data. (web server, file system, database, CMS, ...). Data is read from a data source using crawler/agents. A data source must have an unique source ID within SMILA to refer to it without having to deal with the technical details of access.
</li></ul>
<ul><li> source object: entity in data source. A crawler/agent can create multiple SMILA objects from a single object source (e.g. by extracting files from a ZIP archive). A source object can be identified with respect to its data source using a relatively simple key (URL, path, primary key, ...)
</li></ul>
<ul><li> record: an entity representing a complete source object or a part of an source object to be processed by SMILA.
<ul><li> Can be split into multiple records.
</li><li> Multiple records referring to different parts of the same source object can be merged again? Could be useful to split really large documents, process them section by section and merge the results again.
</li><li> Can be written to storages or indexes.
</li><li> Can be read from a storage in order to redo the rest of the processing (e.g. to
</li></ul>
</li></ul>
<p>rebuild an index after ontology changes).
</p><p><br />
</p>
<a name="Record_ID_design"></a><h3> <span class="mw-headline"> Record ID design </span></h3>
<p>A Record ID must contain and it must be able to extract:
</p>
<ul><li> data source ID
</li><li> key of source object in data source, relative to the definitions of the data source
</li></ul>
<p>These must be provided by the crawler/agent.
</p><p>Source objects can have multiple key values, e.g. in database tables with a primary key consisting of multiple columns.
</p><p>During processing, the record ID may/can be enhanced:
</p>
<ul><li> Part specification after splitting a compound
<ul><li> Element: part of a container, e.g. path in archive (what about recursion: part of part of part...), attachment index in mails, etc. The element is identified by another key which is relative to the container element.
</li><li> Fragment: identified by page number, section number, section name, etc.
</li></ul>
</li></ul>
<p>If merging is supported, multiple records belonging to the same source object can be merged into a single record. The merged ID must reflect this.
</p><p>Do we want to pack all this into a single ID string (URL, whatever)? All kinds of quoting problems may arise (remember that the source object key could be a complex URL itself already). Thus, we probably want to use a structured ID object. Something like this:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;rec:Record<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span><span class="sc3"><span class="coMULTI">&lt;!-- String: ID of data source --&gt;</span></span><span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span><span class="sc3"><span class="coMULTI">&lt;!-- String: key of source object relative to data source --&gt;</span></span><span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
&nbsp;
<span class="sc3"><span class="coMULTI">&lt;!-- the elements above are mandatory, the following is optional --&gt;</span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span><span class="sc3"><span class="coMULTI">&lt;!-- String: path in archive, attachment index --&gt;</span></span><span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="coMULTI">&lt;!-- id:Element can be repeated for recursive archives --&gt;</span></span>
<span class="sc3"><span class="re1">&lt;/id:Element<span class="re2">&gt;</span></span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;id:Fragment<span class="re2">&gt;</span></span></span><span class="sc3"><span class="coMULTI">&lt;!-- page number, section name/number --&gt;</span></span><span class="sc3"><span class="re1">&lt;/id:Fragment<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="coMULTI">&lt;!-- maybe repeated e.g. for books: Part, Chapter, Section, Subsection ... --&gt;</span></span>
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span>
&nbsp;
<span class="sc3"><span class="coMULTI">&lt;!-- other metadata and non-binary content --&gt;</span></span>
&nbsp;
<span class="sc3"><span class="re1">&lt;/rec:Record<span class="re2">&gt;</span></span></span></pre></div>
<p>For a source object with multiple key values it must be distinguishable which key
value belongs to which key "column". Therefore id:Key can be optionally annotated with a
name attribute:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;rec:Record<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span><span class="sc3"><span class="coMULTI">&lt;!-- String: ID of data source --&gt;</span></span><span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key</span> <span class="re0">name</span>=<span class="st0">&quot;column1&quot;</span><span class="re2">&gt;</span></span><span class="sc3"><span class="coMULTI">&lt;!-- key value in named column --&gt;</span></span><span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key</span> <span class="re0">name</span>=<span class="st0">&quot;column2&quot;</span><span class="re2">&gt;</span></span><span class="sc3"><span class="coMULTI">&lt;!-- key value in named column --&gt;</span></span><span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
...
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/rec:Record<span class="re2">&gt;</span></span></span></pre></div>
<p>Because id:Element uses the id:Key element to identify the element inside a compound,
it would be technically possible to support compounds that need multiple key values to
identify an element. We cannot think of an actual use case currently, though (-;
</p><p>In Java:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">public</span> <span class="kw1">interface</span> ID <span class="kw1">extends</span> <span class="kw3">Serializable</span>
<span class="br0">&#123;</span>
<span class="kw3">String</span> getSource<span class="br0">&#40;</span><span class="br0">&#41;</span>;
<span class="kw3">Key</span> getKey<span class="br0">&#40;</span><span class="br0">&#41;</span>;
&nbsp;
List&lt;Key&gt; getElements<span class="br0">&#40;</span><span class="br0">&#41;</span>;
List&lt;String&gt; getFragments<span class="br0">&#40;</span><span class="br0">&#41;</span>;
&nbsp;
ID createElementID<span class="br0">&#40;</span><span class="kw3">String</span> elementName<span class="br0">&#41;</span>;
ID createElementID<span class="br0">&#40;</span><span class="kw3">Key</span> elementKey<span class="br0">&#41;</span>;
ID createFragmentID<span class="br0">&#40;</span><span class="kw3">String</span> framentName<span class="br0">&#41;</span>;
&nbsp;
ID mergeWith<span class="br0">&#40;</span>Collection&lt;ID&gt; otherParts<span class="br0">&#41;</span>;
<span class="br0">&#125;</span></pre></div>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">public</span> <span class="kw1">interface</span> <span class="kw3">Key</span> <span class="kw1">extends</span> <span class="kw3">Serializable</span>
<span class="br0">&#123;</span>
<span class="kw1">static</span> <span class="kw1">final</span> <span class="kw3">String</span> NONAME = <span class="st0">&quot;__SMILA:unnamedkey__&quot;</span>;
&nbsp;
Iterator&lt;String&gt; getKeyNames<span class="br0">&#40;</span><span class="br0">&#41;</span>;
<span class="kw3">String</span> getKey<span class="br0">&#40;</span><span class="kw3">String</span> name<span class="br0">&#41;</span>;
<span class="kw3">String</span> getKey<span class="br0">&#40;</span><span class="br0">&#41;</span>; <span class="co1">// shortcut for getKey(NONAME)</span>
<span class="br0">&#125;</span></pre></div>
<div dir="ltr" style="text-align: left;"><pre class="source-java"><span class="kw1">public</span> <span class="kw1">interface</span> IDFactory
<span class="br0">&#123;</span>
ID createID<span class="br0">&#40;</span><span class="kw3">String</span> source, <span class="kw3">Key</span> key<span class="br0">&#41;</span>;
<span class="kw3">Key</span> createKey<span class="br0">&#40;</span>Map&lt;String, String&gt; keyValues<span class="br0">&#41;</span>;
&nbsp;
<span class="co1">// convenience methods:</span>
ID createID<span class="br0">&#40;</span><span class="kw3">String</span> source, <span class="kw3">String</span> key<span class="br0">&#41;</span>;
ID createID<span class="br0">&#40;</span><span class="kw3">String</span> source, Map&lt;String, String&gt; keyValues<span class="br0">&#41;</span>;
<span class="kw3">Key</span> createKey<span class="br0">&#40;</span><span class="kw3">String</span> key<span class="br0">&#41;</span>;
<span class="br0">&#125;</span></pre></div>
<p>IDs should be usable as hash keys:
</p>
<ul><li> IDs are unchangeable objects
</li><li> Provide appropriate hashcode() implementation
</li></ul>
<a name="Examples"></a><h3> <span class="mw-headline"> Examples </span></h3>
<p>Assume a file system data source named "share", referring to a shared directory on a file server (e.g. "\\fileserv\share"). It looks like this:
</p>
<pre>
\\fileserv\share
|- PDF
| \- big.pdf
\- Archive
\- oldstuff.zip
\- PDF
\- old.pdf
\- another.zip
\- another.pdf
</pre>
<p>"big.pdf" initially gets this ID:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span>share<span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>PDF/big.pdf<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span></pre></div>
<p>After splitting it by pages, the following ID refers to the first page of the document:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span>share<span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>PDF/big.pdf<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Fragment<span class="re2">&gt;</span></span></span>0<span class="sc3"><span class="re1">&lt;/id:Fragment<span class="re2">&gt;</span></span></span> <span class="sc3"><span class="coMULTI">&lt;!-- or start counting at 1? --&gt;</span></span>
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span></pre></div>
<p>Similar for the ZIP: It starts as:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span>share<span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>Archive/oldstuff.zip<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span></pre></div>
<p>When it is expanded, the contained file is referred to as
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span>share<span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>Archive/oldstuff.zip<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>PDF/old.pdf<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span></pre></div>
<p>which it turn can be splitted to pages to become:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span>share<span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>Archive/oldstuff.zip<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>PDF/old.pdf<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Fragment<span class="re2">&gt;</span></span></span>0<span class="sc3"><span class="re1">&lt;/id:Fragment<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span></pre></div>
<p>And finally, the first page of the PDF in the recursive.zip would have this ID:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span>share<span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>Archive/oldstuff.zip<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>another.zip<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>another.pdf<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Fragment<span class="re2">&gt;</span></span></span>0<span class="sc3"><span class="re1">&lt;/id:Fragment<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span></pre></div>
<p>Similar, for a mail server as a data source "mail" we could have the following ID
to refer to an attachment of a mail in folder INBOX. In this case, the Element name
is the index of the Mime Message part in the message in this case.
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span>mail<span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>INBOX/42<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key<span class="re2">&gt;</span></span></span>2<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:Element<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span></pre></div>
<p>A row in a database table with a primary key consisting of columns x and y would
be identitified like this:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;id:ID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Source<span class="re2">&gt;</span></span></span>db<span class="sc3"><span class="re1">&lt;/id:Source<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key</span> <span class="re0">name</span>=<span class="st0">&quot;x&quot;</span><span class="re2">&gt;</span></span>0815<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;id:Key</span> <span class="re0">name</span>=<span class="st0">&quot;y&quot;</span><span class="re2">&gt;</span></span>4711<span class="sc3"><span class="re1">&lt;/id:Key<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/id:ID<span class="re2">&gt;</span></span></span></pre></div>
<!--
NewPP limit report
Preprocessor node count: 49/1000000
Post-expand include size: 0/2097152 bytes
Template argument size: 0/2097152 bytes
#ifexist count: 0/100
-->
<!-- Saved in parser cache with key wikidb:pcache:idhash:15154-0!1!0!!en!2!edit=0 and timestamp 20130416061043 -->
<div class="printfooter">
Retrieved from "<a href="ID_Concept.html">http://wiki.eclipse.org/SMILA/Project_Concepts/ID_Concept</a>"</div>
<div id="catlinks"><p class='catlinks'><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Category</a>: <span dir='ltr'><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></span></p></div> <!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<!-- Yoink of toolbox for phoenix moved up -->
</div>
</div>
<div id="clearFooter"/>
<div id="footer" >
<ul id="footernav">
<li class="first"><a href="http://www.eclipse.org/">Home</a></li>
<li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li>
<li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li>
</ul>
<span id="copyright">Copyright &copy; 2013 The Eclipse Foundation. All Rights Reserved</span>
<p id="footercredit">This page was last modified 14:37, 17 June 2009 by <a href="http://wiki.eclipse.org/User:Igor.novakovic.empolis.com" title="User:Igor.novakovic.empolis.com">Igor Novakovic</a>. Based on work by <a href="http://wiki.eclipse.org/User:Juergen.schumacher.empolis.com" title="User:Juergen.schumacher.empolis.com">Juergen Schumacher</a>.</p>
<p id="footerviews">This page has been accessed 2,715 times.</p>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
var pageTracker = _gat._getTracker("UA-910670-4");
pageTracker._trackPageview();
</script>
<!-- <div class="visualClear"></div> -->
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served in 0.051 secs. --></body></html>