blob: 66173b823daaf6c757be32f7a5d7a5c61203dd9f [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="SMILA/Documentation/AperturePipelet,SMILA/Glossary" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/Documentation/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" />
<link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=rss" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=atom" />
<title>SMILA/Documentation/AperturePipelet - Eclipsepedia</title>
<style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style>
<link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" />
<link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" />
<!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]-->
<!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]-->
<!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]-->
<!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]-->
<!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]-->
<!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "eclipsenova";
var stylepath = "/skins";
var wgArticlePath = "/$1";
var wgScriptPath = "";
var wgScript = "/index.php";
var wgServer = "http://wiki.eclipse.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "SMILA/Documentation/AperturePipelet";
var wgTitle = "SMILA/Documentation/AperturePipelet";
var wgAction = "view";
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
var wgArticleId = "15200";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = "151788";
var wgVersion = "1.12.0";
var wgEnableAPI = true;
var wgEnableWriteAPI = false;
/*]]>*/</script>
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script>
<!-- Performance mods similar to those for bug 166401 -->
<script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=eclipsenova"><!-- site js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script>
<style type="text/css">/*<![CDATA[*/
.source-xml {line-height: normal; font-size: medium;}
.source-xml li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for xml
* CSS class: source-xml, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-xml .de1, .source-xml .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-xml {}
.source-xml .head {}
.source-xml .foot {}
.source-xml .imp {font-weight: bold; color: red;}
.source-xml .ln-xtra {color: #cc0; background-color: #ffc;}
.source-xml li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-xml li.li2 {font-weight: bold;}
.source-xml .coMULTI {color: #808080; font-style: italic;}
.source-xml .es0 {color: #000099; font-weight: bold;}
.source-xml .br0 {color: #66cc66;}
.source-xml .st0 {color: #ff0000;}
.source-xml .nu0 {color: #cc66cc;}
.source-xml .sc0 {color: #00bbdd;}
.source-xml .sc1 {color: #ddbb00;}
.source-xml .sc2 {color: #339933;}
.source-xml .sc3 {color: #009900;}
.source-xml .re0 {color: #000066;}
.source-xml .re1 {font-weight: bold; color: black;}
.source-xml .re2 {font-weight: bold; color: black;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><link rel="stylesheet" type="text/css" href="AperturePipelet.html" /> </head>
<body class="mediawiki ns-0 ltr page-SMILA_Documentation_AperturePipelet">
<div id="globalWrapper">
<div id="column-one">
<!-- Eclipse Additions for the Top Nav start here M. Ward-->
<div id="header">
<div id="header-graphic">
<img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki">
</div>
<!-- Pulled 101409 Mward -->
<div class="portlet" id="p-personal">
<div class="pBody">
<ul>
<li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/AperturePipelet">Log in</a></li>
</ul>
</div>
</div>
<div id="header-icons">
<div id="sites">
<ul id="sitesUL">
<li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li>
<li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li>
<li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li>
<li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li>
<li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li>
<li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li>
</ul>
</div>
</div>
</div>
<!-- NEW HEADER STUFF HERE -->
<div id="header-menu">
<div id="header-nav">
<ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li>
<li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li>
<li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li>
<li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li>
<li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li>
<li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li>
<li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li>
</ul>
</div>
<div id="header-utils">
<!-- moved the search window here -->
<form action="http://wiki.eclipse.org/Special:Search" >
<input class="input" name="search" type="text" accesskey="f" value="" />
<input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" />&nbsp;
<input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" />
</form>
</div>
</div>
<!-- Eclipse Additions for the Header stop here -->
<!-- Additions and mods for leftside nav Start here -->
<!--Started nav rip here-->
<!-- these are the nav controls main page, changes etc -->
<div id="novaContent" class="faux">
<div id="leftcol">
<ul id="leftnav">
<!-- these are the page controls, edit history etc -->
<li class="separator"><a class="separator">Navigation &#160;&#160;</li>
<li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li>
<li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li>
<li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li>
<li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li>
<li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li>
<li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li>
<li class="separator"><a class="separator">Toolbox &#160;&#160;</a></li>
<li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Documentation/AperturePipelet">What links here</a></li>
<li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Documentation/AperturePipelet">Related changes</a></li>
<!-- This is the toolbox section -->
<li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li>
<li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li>
<li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;oldid=151788">Permanent link</a></li> </ul>
</div>
<!-- Additions and mods for leftside nav End here -->
<div id="column-content">
<div id="content">
<a name="top" id="top"></a>
<div id="tabs">
<ul class="primary">
<li class="active"><a href="AperturePipelet.html"><span class="tab">Page</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Documentation/AperturePipelet&amp;action=edit"><span class="tab">Discussion</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;action=edit"><span class="tab">View source</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;action=history"><span class="tab">History</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/AperturePipelet"><span class="tab">Edit</span></a></li>
</ul>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<h1 class="firstHeading">SMILA/Documentation/AperturePipelet</h1>
<div id="bodyContent">
<h3 id="siteSub">From Eclipsepedia</h3>
<div id="contentSub"><span class="subpages">&lt; <a href="../../SMILA.html" title="SMILA">SMILA</a> | <a href="../Documentation.html" title="SMILA/Documentation">Documentation</a></span></div>
<div id="jump-to-nav">Jump to: <a href="AperturePipelet.html#column-one">navigation</a>, <a href="AperturePipelet.html#searchInput">search</a></div> <!-- start content -->
<p><span style="color:#ff0000"><b>This pipelet is not yet available in our repository. As soon as the new Aperture release is available we will submit appropriate CQs and hopefully get permission to use it in our project.</b></span>
</p>
<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1"><a href="AperturePipelet.html#Bundle:_org.eclipse.smila.processing.pipelets.aperture.AperturePipelet"><span class="tocnumber">1</span> <span class="toctext">Bundle: org.eclipse.smila.processing.pipelets.aperture.AperturePipelet</span></a>
<ul>
<li class="toclevel-2"><a href="AperturePipelet.html#Description"><span class="tocnumber">1.1</span> <span class="toctext">Description</span></a></li>
<li class="toclevel-2"><a href="AperturePipelet.html#Configuration"><span class="tocnumber">1.2</span> <span class="toctext">Configuration</span></a>
<ul>
<li class="toclevel-3"><a href="AperturePipelet.html#Example"><span class="tocnumber">1.2.1</span> <span class="toctext">Example</span></a></li>
</ul>
</li>
</ul>
</li>
</ul>
</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
<a name="Bundle:_org.eclipse.smila.processing.pipelets.aperture.AperturePipelet"></a><h2> <span class="mw-headline"> Bundle: <tt>org.eclipse.smila.processing.pipelets.aperture.AperturePipelet</tt> </span></h2>
<a name="Description"></a><h3> <span class="mw-headline"> Description </span></h3>
<p>This Pipelet converts various document formats (like PDF,XLS, etc.) to plain text using <a href="../Glossary.html#Aperture" title="SMILA/Glossary">Aperture</a> technology.
It converts the document's content in <i>AttachmentContent</i> and stores the plain text result in <i>AttachmentText</i>. The optional MimeType of <i>AttachmentContent</i> in <i>AttachmentMimeType</i> is used for conversion. If no MimeType is provided a MimeType identification is done inside the Pipelet using a <b>MimeTypeIdentifier</b> service.
</p>
<a name="Configuration"></a><h3> <span class="mw-headline"> Configuration </span></h3>
<p>Configuration File: <tt>configuration/org.eclipse.smila.processing.pipelets.aperture/ConverterConfig.xml</tt>
</p>
<table border="1">
<tr>
<th>Property</th><th>Type</th><th>Description
</th></tr>
<tr>
<td>AttachmentContent</td><td>String</td><td>name of the attachment containing the original document content
</td></tr>
<tr>
<td>AttachmentText</td><td>String</td><td>name of the attachment to store the converted text in
</td></tr>
<tr>
<td>AttachmentMimeType</td><td>String</td><td>name of the attribute containing the MimeType of the original document content
</td></tr>
</table>
<p>Note that all properties are required and must be provided.
</p>
<a name="Example"></a><h4> <span class="mw-headline"> Example </span></h4>
<p>The following example was used in the SMILA example application to convert documents delivered by Filesystem- and WebCrawler to plain text.
</p><p><b>ConverterConfig.xml</b>
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;PipeletConfiguration</span> <span class="re0">xmlns</span>=<span class="st0">&quot;http://www.eclipse.org/smila/processor&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;Property</span> <span class="re0">name</span>=<span class="st0">&quot;AttachmentContent&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;Value<span class="re2">&gt;</span></span></span>Content<span class="sc3"><span class="re1">&lt;/Value<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Property<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Property</span> <span class="re0">name</span>=<span class="st0">&quot;AttachmentText&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;Value<span class="re2">&gt;</span></span></span>Text<span class="sc3"><span class="re1">&lt;/Value<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Property<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Property</span> <span class="re0">name</span>=<span class="st0">&quot;AttachmentMimeType&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;Value<span class="re2">&gt;</span></span></span>MimeType<span class="sc3"><span class="re1">&lt;/Value<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Property<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/PipeletConfiguration<span class="re2">&gt;</span></span></span></pre></div>
<!--
NewPP limit report
Preprocessor node count: 11/1000000
Post-expand include size: 0/2097152 bytes
Template argument size: 0/2097152 bytes
#ifexist count: 0/100
-->
<!-- Saved in parser cache with key wikidb:pcache:idhash:15200-0!1!0!!en!2!edit=0 and timestamp 20110617075042 -->
<div class="printfooter">
Retrieved from "<a href="AperturePipelet.html">http://wiki.eclipse.org/SMILA/Documentation/AperturePipelet</a>"</div>
<div id="catlinks"><p class='catlinks'><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Categories</a>: <span dir='ltr'><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></span> | <span dir='ltr'><a href="http://wiki.eclipse.org/index.php?title=Category:SMILA/Pipelet&amp;action=edit" class="new" title="Category:SMILA/Pipelet">SMILA/Pipelet</a></span></p></div> <!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<!-- Yoink of toolbox for phoenix moved up -->
</div>
</div>
<div id="clearFooter"/>
<div id="footer" >
<ul id="footernav">
<li class="first"><a href="http://www.eclipse.org/">Home</a></li>
<li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li>
<li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li>
</ul>
<span id="copyright">Copyright &copy; 2011 The Eclipse Foundation. All Rights Reserved</span>
<p id="footercredit">This page was last modified 14:34, 3 May 2009 by <a href="http://wiki.eclipse.org/User:Igor.novakovic.empolis.com" title="User:Igor.novakovic.empolis.com">Igor Novakovic</a>. Based on work by <a href="http://wiki.eclipse.org/User:Daniel.stucky.empolis.com" title="User:Daniel.stucky.empolis.com">Daniel Stucky</a>, <a href="http://wiki.eclipse.org/index.php?title=User:Pwissel.brox.de&amp;action=edit" class="new" title="User:Pwissel.brox.de">Peter Wissel</a> and <a href="http://wiki.eclipse.org/User:G.schmidt.brox.de" title="User:G.schmidt.brox.de">Georg Schmidt</a> and <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;action=credits" title="SMILA/Documentation/AperturePipelet">others</a>.</p>
<p id="footerviews">This page has been accessed 1,935 times.</p>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
var pageTracker = _gat._getTracker("UA-910670-4");
pageTracker._trackPageview();
</script>
<!-- <div class="visualClear"></div> -->
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served in 0.088 secs. --></body></html>