blob: fdd427bbe9ff0ac2aef14eed81f9b5aa165149cb [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir="ltr" class="client-nojs">
<head>
<meta charset="UTF-8" />
<title>SMILA/Documentation/AperturePipelet - Eclipsepedia</title>
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="generator" content="MediaWiki 1.23.2" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/eclipse.org-common/themes/solstice/public/images/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (en)" />
<link rel="EditURI" type="application/rsd+xml" href="http://wiki.eclipse.org/api.php?action=rsd" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom feed" href="http://wiki.eclipse.org/index.php?title=Special:RecentChanges&amp;feed=atom" />
<link rel="stylesheet" href="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=mediawiki.legacy.commonPrint%2Cshared%7Cmediawiki.ui.button&amp;only=styles&amp;skin=solstice&amp;*" />
<link rel="stylesheet" href="http://wiki.eclipse.org/skins/solstice/public/stylesheets/styles.min.css?303" media="screen, print" /><meta name="ResourceLoaderDynamicStyles" content="" />
<style>a:lang(ar),a:lang(kk-arab),a:lang(mzn),a:lang(ps),a:lang(ur){text-decoration:none}
/* cache key: my_wiki:resourceloader:filter:minify-css:7:14ece53a42aa314864e5fd8c57f0d98f */</style>
<script src="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=startup&amp;only=scripts&amp;skin=solstice&amp;*"></script>
<script>if(window.mw){
mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"SMILA/Documentation/AperturePipelet","wgTitle":"SMILA/Documentation/AperturePipelet","wgCurRevisionId":326327,"wgRevisionId":326327,"wgArticleId":15200,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["SMILA","SMILA/Pipelet"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRelevantPageName":"SMILA/Documentation/AperturePipelet","wgIsProbablyEditable":false,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgWikiEditorEnabledModules":{"toolbar":false,"dialogs":false,"hidesig":true,"preview":false,"previewDialog":false,"publish":false},"wgCategoryTreePageCategoryOptions":"{\"mode\":0,\"hideprefix\":20,\"showcount\":true,\"namespaces\":false}"});
}</script><script>if(window.mw){
mw.loader.implement("user.options",function($,jQuery){mw.user.options.set({"ccmeonemails":0,"cols":80,"date":"default","diffonly":0,"disablemail":0,"editfont":"default","editondblclick":0,"editsectiononrightclick":0,"enotifminoredits":0,"enotifrevealaddr":0,"enotifusertalkpages":1,"enotifwatchlistpages":1,"extendwatchlist":0,"fancysig":0,"forceeditsummary":0,"gender":"unknown","hideminor":0,"hidepatrolled":0,"imagesize":2,"math":1,"minordefault":0,"newpageshidepatrolled":0,"nickname":"","norollbackdiff":0,"numberheadings":0,"previewonfirst":0,"previewontop":1,"rcdays":7,"rclimit":50,"rows":25,"showhiddencats":0,"shownumberswatching":1,"showtoolbar":1,"skin":"solstice","stubthreshold":0,"thumbsize":2,"underline":2,"uselivepreview":0,"usenewrc":0,"watchcreations":1,"watchdefault":1,"watchdeletion":0,"watchlistdays":3,"watchlisthideanons":0,"watchlisthidebots":0,"watchlisthideliu":0,"watchlisthideminor":0,"watchlisthideown":0,"watchlisthidepatrolled":0,"watchmoves":0,"wllimit":250,
"useeditwarning":1,"prefershttps":1,"language":"en","variant-gan":"gan","variant-iu":"iu","variant-kk":"kk","variant-ku":"ku","variant-shi":"shi","variant-sr":"sr","variant-tg":"tg","variant-uz":"uz","variant-zh":"zh","searchNs0":true,"searchNs1":false,"searchNs2":false,"searchNs3":false,"searchNs4":false,"searchNs5":false,"searchNs6":false,"searchNs7":false,"searchNs8":false,"searchNs9":false,"searchNs10":false,"searchNs11":false,"searchNs12":false,"searchNs13":false,"searchNs14":false,"searchNs15":false,"variant":"en"});},{},{});mw.loader.implement("user.tokens",function($,jQuery){mw.user.tokens.set({"editToken":"+\\","patrolToken":false,"watchToken":false});},{},{});
/* cache key: my_wiki:resourceloader:filter:minify-js:7:70d74423d3fc1e1c18fa9a1ff645a84a */
}</script>
<script>if(window.mw){
mw.loader.load(["mediawiki.page.startup","mediawiki.legacy.wikibits","mediawiki.legacy.ajax"]);
}</script>
<style type="text/css">/*<![CDATA[*/
.source-xml {line-height: normal;}
.source-xml li, .source-xml pre {
line-height: normal; border: 0px none white;
}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for xml
* CSS class: source-xml, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann
* (http://qbnz.com/highlighter/ and http://geshi.org/)
* --------------------------------------
*/
.xml.source-xml .de1, .xml.source-xml .de2 {font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;}
.xml.source-xml {font-family:monospace;}
.xml.source-xml .imp {font-weight: bold; color: red;}
.xml.source-xml li, .xml.source-xml .li1 {font-weight: normal; vertical-align:top;}
.xml.source-xml .ln {width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;}
.xml.source-xml .li2 {font-weight: bold; vertical-align:top;}
.xml.source-xml .es0 {color: #000099; font-weight: bold;}
.xml.source-xml .br0 {color: #66cc66;}
.xml.source-xml .sy0 {color: #66cc66;}
.xml.source-xml .st0 {color: #ff0000;}
.xml.source-xml .nu0 {color: #cc66cc;}
.xml.source-xml .sc-1 {color: #808080; font-style: italic;}
.xml.source-xml .sc0 {color: #00bbdd;}
.xml.source-xml .sc1 {color: #ddbb00;}
.xml.source-xml .sc2 {color: #339933;}
.xml.source-xml .sc3 {color: #009900;}
.xml.source-xml .re0 {color: #000066;}
.xml.source-xml .re1 {color: #000000; font-weight: bold;}
.xml.source-xml .re2 {color: #000000; font-weight: bold;}
.xml.source-xml .ln-xtra, .xml.source-xml li.ln-xtra, .xml.source-xml div.ln-xtra {background-color: #ffc;}
.xml.source-xml span.xtra { display:block; }
/*]]>*/
</style><meta name="viewport" content="width=device-width, initial-scale=1.0"></head>
<body class="mediawiki ltr sitedir-ltr ns-0 ns-subject page-SMILA_Documentation_AperturePipelet skin-solstice action-view" id="solstice">
<a class="sr-only" href="AperturePipelet.html#content">Skip to main content</a>
<div class="thin-header">
<header role="banner" class="hidden-print noprint">
<div class="container-fluid">
<div id="row-logo-search">
<div id="header-left">
<div class="row">
<div class="hidden-xs col-sm-6 logo-container">
<a href="https://www.eclipse.org/" ><img class="logo-eclipse-default" src="http://wiki.eclipse.org/skins/solstice/public/images/logo/eclipse-800x188.png" alt="Eclipsepedia"></a>
</div>
<div class="navbar col-sm-18 yamm" id="main-menu">
<div class="navbar-collapse collapse" id="navbar-collapse-1">
<ul class="nav navbar-nav">
<li><a target="_self" href="https://eclipse.org/downloads/">Download</a></li>
<li><a target="_self" href="https://eclipse.org/users/">Getting Started </a></li>
<li><a target="_self" href="https://eclipse.org/membership/">Members</a></li>
<li><a target="_self" href="https://eclipse.org/projects/">Projects</a></li>
<li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="AperturePipelet.html#">Community <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="http://marketplace.eclipse.org">Marketplace</a></li><li><a href="http://events.eclipse.org">Events</a></li><li><a href="http://www.planeteclipse.org/">Planet Eclipse</a></li><li><a href="https://eclipse.org/community/eclipse_newsletter/">Newsletter</a></li><li><a href="https://www.youtube.com/user/EclipseFdn">Videos</a></li></ul></li><li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="AperturePipelet.html#">Participate <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li><li><a href="https://eclipse.org/forums/">Forums</a></li><li><a href="https://eclipse.org/mail/">Mailing Lists</a></li><li><a href="https://wiki.eclipse.org/">Wiki</a></li><li><a href="https://wiki.eclipse.org/IRC">IRC</a></li><li><a href="https://eclipse.org/contribute/">How to Contribute</a></li></ul></li><li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="AperturePipelet.html#">Working Groups <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="http://wiki.eclipse.org/Auto_IWG">Automotive</a></li><li><a href="http://iot.eclipse.org">Internet of Things</a></li><li><a href="http://locationtech.org">LocationTech</a></li><li><a href="http://lts.eclipse.org">Long-Term Support</a></li><li><a href="http://polarsys.org">PolarSys</a></li><li><a href="http://science.eclipse.org">Science</a></li><li><a href="http://openmdm.org">OpenMDM</a></li></ul></li><!-- More -->
<li class="dropdown hidden-xs"><a class="dropdown-toggle" data-toggle="dropdown">More<b class="caret"></b></a>
<ul class="dropdown-menu">
<li>
<!-- Content container to add padding -->
<div class="yamm-content">
<div class="row">
<ul class="col-sm-8 list-unstyled"><li><p><strong>Community</strong></p></li><li><a href="http://marketplace.eclipse.org">Marketplace</a></li><li><a href="http://events.eclipse.org">Events</a></li><li><a href="http://www.planeteclipse.org/">Planet Eclipse</a></li><li><a href="https://eclipse.org/community/eclipse_newsletter/">Newsletter</a></li><li><a href="https://www.youtube.com/user/EclipseFdn">Videos</a></li></ul><ul class="col-sm-8 list-unstyled"><li><p><strong>Participate</strong></p></li><li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li><li><a href="https://eclipse.org/forums/">Forums</a></li><li><a href="https://eclipse.org/mail/">Mailing Lists</a></li><li><a href="https://wiki.eclipse.org/">Wiki</a></li><li><a href="https://wiki.eclipse.org/IRC">IRC</a></li><li><a href="https://eclipse.org/contribute/">How to Contribute</a></li></ul><ul class="col-sm-8 list-unstyled"><li><p><strong>Working Groups</strong></p></li><li><a href="http://wiki.eclipse.org/Auto_IWG">Automotive</a></li><li><a href="http://iot.eclipse.org">Internet of Things</a></li><li><a href="http://locationtech.org">LocationTech</a></li><li><a href="http://lts.eclipse.org">Long-Term Support</a></li><li><a href="http://polarsys.org">PolarSys</a></li><li><a href="http://science.eclipse.org">Science</a></li><li><a href="http://openmdm.org">OpenMDM</a></li></ul> </div>
</div>
</li>
</ul>
</li>
</ul>
</div>
<div class="navbar-header">
<button data-target="#navbar-collapse-1" data-toggle="collapse" class="navbar-toggle" type="button">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="https://www.eclipse.org/" class="visible-xs navbar-brand"><img class="logo-eclipse-default" src="http://wiki.eclipse.org/skins/solstice/public/images/logo/eclipse-800x188.png" alt="Eclipsepedia" width="174"></a>
</div>
</div>
</div>
</div>
</div>
</div>
</header>
<section class="defaut-breadcrumbs hidden-print noprint hidden-print clearfix" id="breadcrumb">
<div>
<ol class="breadcrumb">
<li><a href="https://www.eclipse.org/">Home</a></li>
<li><a href="http://wiki.eclipse.org/Main_Page">Eclipse Wiki</a></li>
<li class="active">SMILA/Documentation/AperturePipelet</li></ol>
</div>
</section>
</div>
<div class="toolbar-menu breadcrumbs-offset noprint hidden-print margin-bottom-0 clearfix">
<div class="col-md-24">
<ol class="breadcrumb" role="navigation">
<li id="pt-login">
<a href="http://wiki.eclipse.org/index.php?title=Special:UserLogin&amp;returnto=SMILA%2FDocumentation%2FAperturePipelet">
<i class="fa fa-sign-in fa-fw orange"></i> Log in </a>
</li>
</ul>
</div>
</div>
<main role="main" class="background-grey">
<div class="container-full padding-top-25">
<!-- content -->
<section id="content" class="mw-body container-full clearfix 0">
<div id="mw-js-message" style="display:none;"></div>
<!-- bodyContent -->
<div id="bodyContent">
<!-- jumpto -->
<div id="jump-to-nav" class="mw-jump">
Jump to: <a href="AperturePipelet.html#mw-head">navigation</a>,
<a href="AperturePipelet.html#p-search">search</a>
</div>
<!-- /jumpto -->
<!-- leftcol -->
<aside class="col-md-4 noprint hidden-print" id="leftcol">
<form class="input-group" role="form" id="form-eclipse-search" action="http://wiki.eclipse.org/index.php" id="searchform">
<input id="searchInput" class="search-query form-control" type="search" accesskey="f" title="Special:Search" placeholder="Search" name="search" value="">
<span class="input-group-btn">
<button value="search" id="mw-searchButton" type="submit" class="btn btn-default" title="Search the pages for this text" name="fulltext">
<i class="fa fa-search"></i>
</button>
</span>
</form>
<select class="form-control margin-top-10 margin-bottom-10 visible-xs visible-sm" onchange="this.options[this.selectedIndex].value && (window.location = this.options[this.selectedIndex].value);"><option class="fw-700 "><span class="fw-700">---Navigation---</span></option><option value="/Main_Page">Main Page</option><option value="/Eclipsepedia:Community_portal">Community portal</option><option value="/Eclipsepedia:Current_events">Current events</option><option value="/Special:RecentChanges">Recent changes</option><option value="/Special:Random">Random page</option><option value="https://www.mediawiki.org/wiki/Special:MyLanguage/Help:Contents">Help</option></select><ul class="ul-left-nav fa-ul hidden-print leftnav hidden-xs hidden-sm"><li class="separator"><span class="separator">Navigation</span></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Main_Page" id="n-mainpage" title="Visit the main page [z]" accesskey="z">Main Page</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Eclipsepedia:Community_portal" id="n-portal" title="About the project, what you can do, where to find things">Community portal</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Eclipsepedia:Current_events" id="n-currentevents" title="Find background information on current events">Current events</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:RecentChanges" id="n-recentchanges" title="A list of recent changes in the wiki [r]" accesskey="r">Recent changes</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:Random" id="n-randompage" title="Load a random page [x]" accesskey="x">Random page</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="https://www.mediawiki.org/wiki/Special:MyLanguage/Help:Contents" id="n-help" title="The place to find out">Help</a></li></ul> <select class="form-control margin-top-10 margin-bottom-10 visible-xs visible-sm" onchange="this.options[this.selectedIndex].value && (window.location = this.options[this.selectedIndex].value);"><option class="fw-700 "><span class="fw-700">---Toolbox---</span></option><option value="/index.php?title=SMILA/Documentation/AperturePipelet&amp;action=info">Page information</option><option value="/index.php?title=SMILA/Documentation/AperturePipelet&amp;oldid=326327">Permanent link</option><option value="/index.php?title=SMILA/Documentation/AperturePipelet&amp;printable=yes">Printable version</option><option value="/Special:SpecialPages">Special pages</option><option value="/Special:RecentChangesLinked/SMILA/Documentation/AperturePipelet">Related changes</option><option value="/Special:WhatLinksHere/SMILA/Documentation/AperturePipelet">What links here</option></select><ul class="ul-left-nav fa-ul hidden-print leftnav hidden-xs hidden-sm"><li class="separator"><span class="separator">Toolbox</span></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;action=info" id="t-info">Page information</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;oldid=326327" id="t-permalink" title="Permanent link to this revision of the page">Permanent link</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;printable=yes" id="t-print" rel="alternate" title="Printable version of this page [p]" accesskey="p">Printable version</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:SpecialPages" id="t-specialpages" title="A list of all special pages [q]" accesskey="q">Special pages</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:RecentChangesLinked/SMILA/Documentation/AperturePipelet" id="t-recentchangeslinked" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:WhatLinksHere/SMILA/Documentation/AperturePipelet" id="t-whatlinkshere" title="A list of all wiki pages that link here [j]" accesskey="j">What links here</a></li></ul> </aside>
<!-- /leftcol -->
<!-- mainContent -->
<div id="mainContent" class="col-md-20">
<ul class="nav nav-tabs noprint hidden-print" role="tablist">
<li id="ca-nstab-main" class="active"><a href="AperturePipelet.html" title="View the content page [c]" accesskey="c" tabindex="-1">Page</a></li>
<li id="ca-talk" class="new"><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Documentation/AperturePipelet&amp;action=edit&amp;redlink=1" title="Discussion about the content page [t]" accesskey="t" tabindex="-1">Discussion</a></li>
<li id="ca-viewsource"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;action=edit" title="This page is protected.&#10;You can view its source [e]" accesskey="e" tabindex="-1">View source</a></li>
<li id="ca-history" class="collapsible"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;action=history" title="Past revisions of this page [h]" accesskey="h" tabindex="-1">History</a></li>
</ul> <div class="tab-content background-white">
<div id="tab-pane-main-page-content" class="tab-pane active">
<h1 id="firstHeading" class="firstHeading page-header">
<span dir="auto">SMILA/Documentation/AperturePipelet</span>
</h1>
<div id="main-page-content">
<!-- subtitle -->
<div id="contentSub" class="alert alert-small alert-warning"><span class="subpages">&lt; <a href="../../SMILA.html" title="SMILA">SMILA</a>&lrm; | <a href="../Documentation.1.html" title="SMILA/Documentation">Documentation</a></span></div>
<!-- /subtitle -->
<div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr"><p><span style="color:#ff0000"><b>This pipelet is not available as we have switched from Aperture to Tika.</b></span>
</p>
<div id="toc" class="toc"><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1 tocsection-1"><a href="AperturePipelet.html#Bundle:_org.eclipse.smila.aperture.pipelets.AperturePipelet"><span class="tocnumber">1</span> <span class="toctext">Bundle: org.eclipse.smila.aperture.pipelets.AperturePipelet</span></a>
<ul>
<li class="toclevel-2 tocsection-2"><a href="AperturePipelet.html#Description"><span class="tocnumber">1.1</span> <span class="toctext">Description</span></a>
<ul>
<li class="toclevel-3 tocsection-3"><a href="AperturePipelet.html#Supported_document_types"><span class="tocnumber">1.1.1</span> <span class="toctext">Supported document types</span></a></li>
</ul>
</li>
<li class="toclevel-2 tocsection-4"><a href="AperturePipelet.html#Configuration"><span class="tocnumber">1.2</span> <span class="toctext">Configuration</span></a>
<ul>
<li class="toclevel-3 tocsection-5"><a href="AperturePipelet.html#Configuring_the_Property_Mapping"><span class="tocnumber">1.2.1</span> <span class="toctext">Configuring the Property Mapping</span></a></li>
<li class="toclevel-3 tocsection-6"><a href="AperturePipelet.html#Example"><span class="tocnumber">1.2.2</span> <span class="toctext">Example</span></a></li>
<li class="toclevel-3 tocsection-7"><a href="AperturePipelet.html#Typical_documents_and_properties"><span class="tocnumber">1.2.3</span> <span class="toctext">Typical documents and properties</span></a></li>
</ul>
</li>
<li class="toclevel-2 tocsection-8"><a href="AperturePipelet.html#Extending_Aperture"><span class="tocnumber">1.3</span> <span class="toctext">Extending Aperture</span></a></li>
</ul>
</li>
</ul>
</div>
<h2><span class="mw-headline" id="Bundle:_org.eclipse.smila.aperture.pipelets.AperturePipelet">Bundle: <tt>org.eclipse.smila.aperture.pipelets.AperturePipelet</tt></span></h2>
<h3><span class="mw-headline" id="Description">Description</span></h3>
<p>This pipelet converts various document formats (such as PDF, Microsoft Office formats, OpenOffice formats, etc.) to plain text using <a href="../Glossary.html#Aperture" title="SMILA/Glossary">Aperture</a> technology: A binary attachment content can thus be converted to plain text and stored in an attribute. In addition to that, metadata properties of the document (like title, author, etc) can be extracted and written to record attibutes. The optional MimeType of the document in <i>MimeTypeAttribute</i> is used for conversion. If no MimeType is provided a MimeType identification is done inside the pipelet using a <tt>MimeTypeIdentifier</tt> service.
</p><p>The AperturePipelet supports the configurable error handling as described in <a href="../Development_Guidelines/How_to_write_a_Pipelet.html#Implementation" title="SMILA/Development Guidelines/How to write a Pipelet" class="mw-redirect">SMILA/Development_Guidelines/How_to_write_a_Pipelet#Implementation</a>. When used in jobmanager workflows, records causing errors are dropped.
</p>
<h4><span class="mw-headline" id="Supported_document_types">Supported document types</span></h4>
<p>By default, SMILA contains only a subset of Aperture that supports the conversion of:
</p>
<ul>
<li> plain text documents (of course&#160;;-)
</li>
<li> XML documents
</li>
<li> RTF documents
</li>
<li> Adobe PDF documents
</li>
<li> Microsoft Office documents, both the old formats (doc, xls, ppt) and the new OOXML formats (docx, xlsx, pptx)
</li>
<li> Microsoft Visio documents
</li>
<li> OpenOffice documents (OpenDocument formats)
</li>
</ul>
<p>Note: We do not include the HTML extractor currently because it depends on an HTML parser implementation with LGPL, which we are not allowed to redistriebute. See below on hints how to add Aperture extractors for further formats
</p>
<h3><span class="mw-headline" id="Configuration">Configuration</span></h3>
<table border="1">
<tr>
<th>Property</th>
<th>Type</th>
<th>Read Type</th>
<th>Description
</th></tr>
<tr>
<td><i>inputType</i></td>
<td>String&#160;: <i>ATTACHMENT, ATTRIBUTE</i></td>
<td>runtime</td>
<td>selects if the input is found in an attachment or attribute of the record. Usually it doesn't make sense to use "ATTRIBUTE" here because the documents to convert are binary content.
</td></tr>
<tr>
<td><i>outputType</i></td>
<td>String&#160;: <i>ATTACHMENT, ATTRIBUTE</i></td>
<td>runtime</td>
<td>selects if output should be stored in an attachment or attribute of the record
</td></tr>
<tr>
<td><i>inputName</i></td>
<td>String</td>
<td>runtime</td>
<td>name of input attachment or path to input attribute (process a String literal of attribute)
</td></tr>
<tr>
<td><i>outputName</i></td>
<td>String</td>
<td>runtime</td>
<td> name of output attachment or path to output attribute for plain text (store result as String literal of attribute)
</td></tr>
<tr>
<td><i>ExtractProperties</i></td>
<td>String</td>
<td>runtime</td>
<td>Specifies which metadata properties reported by Aperture for the document should be written to which record attribute. See below for details.
</td></tr>
<tr>
<td><i>MimeTypeAttribute</i></td>
<td>String</td>
<td>runtime</td>
<td>Parameter referencing the attribute that contains the mimetype of the document. The parameter (resp. attribute) may not be set (null) and then a mimetype detection is performed. If the attribute has not been set, it will be set during the processing of the record to the detected mime type.
</td></tr>
<tr>
<td><i>FileExtensionAttribute</i></td>
<td>String</td>
<td>runtime</td>
<td>Parameter referencing the attribute that file extension of the file that was the source of the attachment content. If the mimetype attribute is not specified or does not have a value, the file extension can be used to improve the automatic mime type detection. It not specified, the mimetype detection is based on the attachment content only.
</td></tr>
</table>
<p>Note that all properties are required and must be provided.
</p>
<h4><span class="mw-headline" id="Configuring_the_Property_Mapping">Configuring the Property Mapping</span></h4>
<p>In addition to the plain text content, Aperture can extract metadata properties from documents like the title, author, publisher, dates of publication etc, ... The names of these properties are URIs. Aperture uses URIs defined by
</p>
<ul>
<li> Dublin Core (<a rel="nofollow" class="external text" href="http://dublincore.org/documents/dces/">dc</a>, <a rel="nofollow" class="external text" href="http://dublincore.org/documents/dcmi-terms/">dcterms</a>)
</li>
<li> OSCAF/Nepomuk (<a rel="nofollow" class="external autonumber" href="http://www.semanticdesktop.org/ontologies/">[1]</a>)
</li>
<li> Microsoft OOXML extended properties from Microsoft Office documents (Office 2007 and higher)
</li>
<li> OpenDocument Meta (<code><a rel="nofollow" class="external free" href="urn:oasis:names:tc:opendocument:xmlns:meta:1.0">urn:oasis:names:tc:opendocument:xmlns:meta:1.0</a></code>) for OpenOffice documents
</li>
</ul>
<p>and probably there are others which we just did not discover yet. It depends very much on the documents what is actually extracted. To check with your documents you can download one of the "aperture-eclipse-1.4.0" archives from <a rel="nofollow" class="external autonumber" href="http://sourceforge.net/projects/aperture/files/Aperture/1.4.0/">[2]</a>, unpack it and start <code>bin/fileinspector.(sh|bat)</code>. Open a document with it and you will see an RDF representation of the extracted metadata.
</p><p>To store such metadata properties in SMILA records, you must specify the URLs of the properties you want to store in the <i>ExtractProperties</i> parameter. Usually this parameter contains a sequence of string values. The string values can have one of the following formats:
</p>
<ul>
<li> <code>&lt;Property-URL&gt;</code>: Add the values of this property to an attribute with the same name.
</li>
<li> <code>&lt;Property-URL&gt;-&gt;&lt;Attribute-Name&gt;</code>: Add the values of the property to the attribute with the given name
</li>
<li> <code>&lt;Property-URL&gt;-&gt;&gt;&lt;Attribute-Name&gt;</code>: Store the values of the property in the attribute with the given name, remove existing values first.
</li>
</ul>
<p>To improve readability, it is possible to abbreviate the property URLs by using namespace prefixes. The available prefixes are specified in <a rel="nofollow" class="external text" href="https://dev.eclipse.org/svnroot/rt/org.eclipse.smila/trunk/core/org.eclipse.smila.aperture/namespaces.properties">namespaces.properties</a> in the <code>org.eclipse.smila.aperture</code> bundle. To add namespaces to this file, extend it and put it in the configuration area in directory <code>org.eclipse.smila.aperture</code>. Using the predefined namespaces you can use, for example:
</p>
<ul>
<li> <code>dc:creator</code> instead of <code><a rel="nofollow" class="external free" href="http://purl.org/dc/elements/1.1/creator">http://purl.org/dc/elements/1.1/creator</a></code>
</li>
<li> <code>nfo:pageCount</code> instead of <code><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#pageCount">http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#pageCount</a></code>
</li>
<li> <code>nie:contentCreated</code> instead of <code><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a></code>
</li>
<li> <code>extended-properties:Company</code> instead of <code><a rel="nofollow" class="external free" href="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties/Company">http://schemas.openxmlformats.org/officeDocument/2006/extended-properties/Company</a></code>
</li>
<li> <code>meta:creation-date</code> instead of <code><a rel="nofollow" class="external free" href="urn:oasis:names:tc:opendocument:xmlns:meta:1.0/creation-date">urn:oasis:names:tc:opendocument:xmlns:meta:1.0/creation-date</a></code>
</li>
</ul>
<p>If you use namespace abbreviations to specify the properties to extract, but don't specify target attributes, the target attributes will be the <i>abbreviated</i> URIs.
</p><p>If the property value reported by Aperture is a resource, the pipelet tries to find a display name for it. It checks the following properties in this order:
</p>
<ul>
<li> <tt>nco:fullname</tt>
</li>
<li> <tt>nie:title</tt>
</li>
<li> <tt>nao:prefLabel</tt>
</li>
<li> <tt>rdfs:label</tt>
</li>
</ul>
<p>If none of them has a value for the resource, the URI of the resource is used as the attribute value.
</p><p>It is possile to specify the complete mapping in a single string value. To do this, concatenate the single values from the sequence using a semicolon ";" as the separator. This makes it easier to use the AperturePipelet in the <a href="Worker/PipeletProcessorWorker.html" title="SMILA/Documentation/Worker/PipeletProcessorWorker"> PipeletProcessorWorker</a> which currently allows only simple string parameters for pipelet configuration.
</p><p>In any case, the resulting attribute is
</p>
<ul>
<li> a single <tt>Value</tt>, if only one value has been extracted and the value is not appeded to previously existing values
</li>
<li> a <tt>AnySeq</tt> containing all values, if more than one value has been extracted or new values are appended to existing values.
</li>
</ul>
<h4><span class="mw-headline" id="Example">Example</span></h4>
<p>The following example shows how to configure the pipelet to extract the text from the attachment called <i>Content</i> and stores the extracted text in the attribute <i>Text</i>. Additionally the eventually contained Company, Manager and Creator will be stored in properties which are named after their class URIs.
</p><p>E.g. if a word document with the value "ACME" as company and "John Doe" as creator, the resulting record would contain the plain text in the attribute <tt>Text</tt>, the value <tt>ACME</tt> in the attribute <tt>http://schemas.openxmlformats.org/officeDocument/2006/extended-properties/Company</tt>, as well as the value <tt>John Doe</tt> in an attribute <tt>dc:creator</tt>.
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="xml source-xml"><pre class="de1"><span class="sc3"><span class="re1">&lt;proc:configuration<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;inputName&quot;</span><span class="re2">&gt;</span></span>Content<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;inputType&quot;</span><span class="re2">&gt;</span></span>ATTACHMENT<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;outputName&quot;</span><span class="re2">&gt;</span></span>Text<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;outputType&quot;</span><span class="re2">&gt;</span></span>ATTRIBUTE<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;MimeTypeAttribute&quot;</span><span class="re2">&gt;</span></span>MimeType<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;FileExtensionAttribute&quot;</span><span class="re2">&gt;</span></span>FileExtension<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Seq</span> <span class="re0">key</span>=<span class="st0">&quot;ExtractProperties&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;rec:Val<span class="re2">&gt;</span></span></span>http://schemas.openxmlformats.org/officeDocument/2006/extended-properties/Company<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val<span class="re2">&gt;</span></span></span>http://schemas.openxmlformats.org/officeDocument/2006/extended-properties/Manager<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val<span class="re2">&gt;</span></span></span>http://purl.org/dc/elements/1.1/creator-&gt;Author<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/rec:Seq<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/proc:configuration<span class="re2">&gt;</span></span></span></pre></div></div>
<p>The following definition defines the same property mapping, but uses a single string parameter and namespace abbreviations:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="xml source-xml"><pre class="de1"><span class="sc3"><span class="re1">&lt;proc:configuration<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;inputName&quot;</span><span class="re2">&gt;</span></span>Content<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;inputType&quot;</span><span class="re2">&gt;</span></span>ATTACHMENT<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;outputName&quot;</span><span class="re2">&gt;</span></span>Text<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;outputType&quot;</span><span class="re2">&gt;</span></span>ATTRIBUTE<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;MimeTypeAttribute&quot;</span><span class="re2">&gt;</span></span>MimeType<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;FileExtensionAttribute&quot;</span><span class="re2">&gt;</span></span>FileExtension<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;ExtractProperties&quot;</span><span class="re2">&gt;</span></span>extended-properties:Company;extended-properties:Manager;dc:creator-&gt;Author<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/proc:configuration<span class="re2">&gt;</span></span></span></pre></div></div>
<h4><span class="mw-headline" id="Typical_documents_and_properties">Typical documents and properties</span></h4>
<table border="1">
<tr>
<th>Document</th>
<th>Property</th>
<th>URL
</th></tr>
<tr>
<td><i>PDF</i></td>
<td><i>title</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title</a>
</td></tr>
<tr>
<td><i>PDF</i></td>
<td><i>created</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a>
</td></tr>
<tr>
<td><i>PDF</i></td>
<td><i>creator</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator">http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator</a>
</td></tr>
<tr>
<td><i>Excel 97</i></td>
<td><i>title</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title</a>
</td></tr>
<tr>
<td><i>Excel 97</i></td>
<td><i>created</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a>
</td></tr>
<tr>
<td><i>Excel 97</i></td>
<td><i>creator</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator">http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator</a>
</td></tr>
<tr>
<td><i>Excel 2007 (xsl) and 2010 (xslx)</i></td>
<td><i>title</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title</a>
<p><a rel="nofollow" class="external free" href="http://purl.org/dc/elements/1.1/title">http://purl.org/dc/elements/1.1/title</a>
</p>
</td></tr>
<tr>
<td><i>Excel 2007 (xsl) and 2010 (xslx)</i></td>
<td><i>created</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a>
</td></tr>
<tr>
<td><i>Excel 2007 (xsl) and 2010 (xslx)</i></td>
<td><i>creator</i></td>
<td><a rel="nofollow" class="external free" href="http://purl.org/dc/elements/1.1/creator">http://purl.org/dc/elements/1.1/creator</a>
<p><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator">http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator</a>
</p>
</td></tr>
<tr>
<td><i>Powerpoint 97 (ppt)</i></td>
<td><i>title</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title</a>
</td></tr>
<tr>
<td><i>Powerpoint 97 (ppt)</i></td>
<td><i>created</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a>
</td></tr>
<tr>
<td><i>Powerpoint 97 (ppt)</i></td>
<td><i>creator</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator">http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator</a>
</td></tr>
<tr>
<td><i>Powerpoint 2010 (ppt)</i></td>
<td><i>title</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title</a>
</td></tr>
<tr>
<td><i>Powerpoint 2010 (ppt)</i></td>
<td><i>created</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a>
</td></tr>
<tr>
<td><i>Powerpoint 2010 (ppt)</i></td>
<td><i>creator</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator">http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator</a>
</td></tr>
<tr>
<td><i>Powerpoint 2010 (pptx)</i></td>
<td><i>title</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title</a>
<p><a rel="nofollow" class="external free" href="http://purl.org/dc/elements/1.1/title">http://purl.org/dc/elements/1.1/title</a>
</p>
</td></tr>
<tr>
<td><i>Powerpoint 2010 (pptx)</i></td>
<td><i>created</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a>
<p><a rel="nofollow" class="external free" href="http://purl.org/dc/terms/created">http://purl.org/dc/terms/created</a>
</p>
</td></tr>
<tr>
<td><i>Powerpoint 2010 (pptx)</i></td>
<td><i>creator</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator">http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator</a>
<p><a rel="nofollow" class="external free" href="http://purl.org/dc/elements/1.1/creator">http://purl.org/dc/elements/1.1/creator</a>
</p>
</td></tr>
<tr>
<td><i>Word 97 (doc)</i></td>
<td><i>title</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title</a>
</td></tr>
<tr>
<td><i>Word 97 (doc)</i></td>
<td><i>created</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a>
</td></tr>
<tr>
<td><i>Word 97 (doc)</i></td>
<td><i>creator</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator">http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator</a>
</td></tr>
<tr>
<td><i>Word 2007 (docx) and Word 2010 (docx)</i></td>
<td><i>title</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title</a>
<p><a rel="nofollow" class="external free" href="http://purl.org/dc/elements/1.1/title">http://purl.org/dc/elements/1.1/title</a>
</p>
</td></tr>
<tr>
<td><i>Word 2007 (docx) and Word 2010 (docx)</i></td>
<td><i>created</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a>
<p><a rel="nofollow" class="external free" href="http://purl.org/dc/terms/created">http://purl.org/dc/terms/created</a>
</p>
</td></tr>
<tr>
<td><i>Word 2007 (docx) and Word 2010 (docx)</i></td>
<td><i>creator</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator">http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator</a>
<p><a rel="nofollow" class="external free" href="http://purl.org/dc/elements/1.1/creator">http://purl.org/dc/elements/1.1/creator</a>
</p>
</td></tr>
<tr>
<td><i>Open Office (ODP, ODS and ODT)</i></td>
<td><i>title</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title</a>
<p><a rel="nofollow" class="external free" href="http://purl.org/dc/elements/1.1/title">http://purl.org/dc/elements/1.1/title</a>
</p>
</td></tr>
<tr>
<td><i>Open Office (ODP, ODS and ODT)</i></td>
<td><i>created</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated">http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated</a>
<p><a rel="nofollow" class="external free" href="urn:oasis:names:tc:opendocument:xmlns:meta:1.0/creation-date">urn:oasis:names:tc:opendocument:xmlns:meta:1.0/creation-date</a>
</p>
</td></tr>
<tr>
<td><i>Open Office (ODP, ODS and ODT)</i></td>
<td><i>creator</i></td>
<td><a rel="nofollow" class="external free" href="http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator">http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator</a>
<p><a rel="nofollow" class="external free" href="http://purl.org/dc/elements/1.1/creator">http://purl.org/dc/elements/1.1/creator</a>
<a rel="nofollow" class="external free" href="urn:oasis:names:tc:opendocument:xmlns:meta:1.0/initial-creator">urn:oasis:names:tc:opendocument:xmlns:meta:1.0/initial-creator</a>
</p>
</td></tr>
</table>
<h3><span class="mw-headline" id="Extending_Aperture">Extending Aperture</span></h3>
<p>SMILA does not contain the complete Aperture distribution, because some converters need third party libraries with problematic licenses that we are not allowed to distribute. However, it should be easy to include those parts of Aperture into your SMILA installation yourself: Just
</p>
<ul>
<li> Download one of the <tt>aperture-eclipse-1.4.0</tt> archives from <a rel="nofollow" class="external autonumber" href="http://sourceforge.net/projects/aperture/files/Aperture/1.4.0/">[3]</a>
</li>
<li> Unpack it.
</li>
<li> Copy the required bundles from <code>lib/aperture-libs</code> and <code>lib/required-libs</code> to <code>SMILA/plugins</code>.
</li>
<li> Add the new extractor bundles to the <code>config.ini</code> to activate them at system start.
</li>
</ul>
<p>For example, to add the HTML extractor, you must add the following bundles from Aperture to SMILA:
</p>
<ul>
<li> <code>lib/aperture-libs/org.semanticdesktop.aperture.extractor-text-html_1.4.0.jar</code>
</li>
<li> <code>lib/aperture-libs/org.semanticdesktop.aperture.helper-html_1.4.0.jar</code>
</li>
<li> <code>lib/required-libs/org.htmlparser_1.6.0.jar</code>
</li>
</ul>
<p>and add <code>org.semanticdesktop.aperture.extractor-text-html@4:start</code> to your <code>config.ini</code> to activate it.
</p><p>Similar, to add support for encrypted PDF files, add the bundle <code>lib/required-libs/bcprov-jdk16-146.jar</code> to SMILA.
</p>
<!--
NewPP limit report
CPU time usage: 0.096 seconds
Real time usage: 0.101 seconds
Preprocessor visited node count: 59/1000000
Preprocessor generated node count: 98/1000000
Post‐expand include size: 0/2097152 bytes
Template argument size: 0/2097152 bytes
Highest expansion depth: 2/40
Expensive parser function count: 0/100
-->
<!-- Saved in parser cache with key my_wiki:pcache:idhash:15200-0!*!0!!en!*!* and timestamp 20150414084642 and revision id 326327
-->
</div>
<!-- catlinks -->
<div id='catlinks' class='catlinks'><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Categories</a>: <ul><li><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></li><li><a href="http://wiki.eclipse.org/index.php?title=Category:SMILA/Pipelet&amp;action=edit&amp;redlink=1" class="new" title="Category:SMILA/Pipelet (page does not exist)">SMILA/Pipelet</a></li></ul></div></div> <!-- /catlinks -->
</div>
</div>
</div>
</div>
<!-- /maincontent -->
<!-- printfooter -->
<div class="printfooter">
Retrieved from "<a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;oldid=326327">http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;oldid=326327</a>" </div>
<!-- /printfooter -->
<!-- debughtml -->
<!-- /debughtml -->
</div>
<!-- /bodyContent -->
</section>
<!-- /content -->
<!-- footer -->
</div> <section id="footer-contribution-info" style="border-top:1px solid #ccc;" class="footer-offset background-white margin-top-25"><div class="container text-center padding-top-10 padding-bottom-10"><p id="footercredit">This page was last modified 06:57, 11 January 2013 by <a href="http://wiki.eclipse.org/index.php?title=User:Daniel.stucky.attensity.com&amp;action=edit&amp;redlink=1" class="new" title="User:Daniel.stucky.attensity.com (page does not exist)">Daniel Stucky</a>. Based on work by <a href="http://wiki.eclipse.org/index.php?title=User:Juergen.schumacher.attensity.com&amp;action=edit&amp;redlink=1" class="new" title="User:Juergen.schumacher.attensity.com (page does not exist)">Juergen Schumacher</a>, <a href="http://wiki.eclipse.org/index.php?title=User:Nadine.auslaender.attensity.com&amp;action=edit&amp;redlink=1" class="new" title="User:Nadine.auslaender.attensity.com (page does not exist)"> </a> and <a href="http://wiki.eclipse.org/User:Drazen.cindric.attensity.com" title="User:Drazen.cindric.attensity.com">Drazen Cindric</a> and <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/AperturePipelet&amp;action=credits" title="SMILA/Documentation/AperturePipelet">others</a>.</p><p id="footerviews">This page has been accessed 5,326 times.</p></div></section> </main> <!-- /#main-content-container-row -->
<p id="back-to-top" class="noprint hidden-print">
<a class="visible-xs" href="AperturePipelet.html#top">Back to the top</a>
</p>
<footer role="contentinfo" class="noprint hidden-print">
<div class="container">
<div class="row">
<section id="footer-eclipse-foundation" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Eclipse Foundation</h2>
<ul class="nav">
<li><a href="https://eclipse.org/org/">About us</a></li>
<li><a href="https://eclipse.org/org/foundation/contact.php">Contact Us</a></li>
<li><a href="https://eclipse.org/donate">Donate</a></li>
<li><a href="https://eclipse.org/org/documents/">Governance</a></li>
<li><a href="https://eclipse.org/artwork/">Logo and Artwork</a></li>
<li><a href="https://eclipse.org/org/foundation/directors.php">Board of Directors</a></li>
</ul>
</section>
<section id="footer-legal" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Legal</h2>
<ul class="nav">
<li><a href="https://eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="https://eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="https://eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="https://eclipse.org/org/documents/epl-v10.php">Eclipse Public License </a></li>
<li><a href="https://eclipse.org/legal/">Legal Resources </a></li>
</ul>
</section>
<section id="footer-useful-links" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Useful Links</h2>
<ul class="nav">
<li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li>
<li><a href="http://help.eclipse.org/">Documentation</a></li>
<li><a href="https://eclipse.org/contribute/">How to Contribute</a></li>
<li><a href="https://eclipse.org/mail/">Mailing Lists</a></li>
<li><a href="https://eclipse.org/forums/">Forums</a></li>
<li><a href="http://marketplace.eclipse.org/">Marketplace</a></li>
</ul>
</section>
<section id="footer-other" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Other</h2>
<ul class="nav">
<li><a href="https://eclipse.org/ide/">IDE and Tools</a></li>
<li><a href="https://eclipse.org/projects">Community of Projects</a></li>
<li><a href="https://eclipse.org/org/workinggroups/">Working Groups</a></li>
</ul>
<ul class="list-inline social-media">
<li><a href="https://twitter.com/EclipseFdn"><i class="fa fa-twitter-square"></i></a></li>
<li><a href="https://plus.google.com/+Eclipse"><i class="fa fa-google-plus-square"></i></a></li>
<li><a href="https://www.facebook.com/eclipse.org"><i class="fa fa-facebook-square"></i> </a></li>
<li><a href="https://www.youtube.com/user/EclipseFdn"><i class="fa fa-youtube-square"></i></a></li>
</ul>
</section>
<div id="copyright" class="col-xs-offset-1 col-sm-14 col-md-24 col-md-offset-0">
<div>
<span><img src="http://eclipse.org/eclipse.org-common/themes/solstice/public/images/logo/eclipse-logo-bw-800x188.png" alt="Eclipse.org black and white logo" width="166" height="39" id="logo-eclipse-white"/></span>
<p id="copyright-text">Copyright &copy; 2014 The Eclipse Foundation. All Rights Reserved.</p>
</div>
</div>
<a href="AperturePipelet.html#" class="scrollup">Back to the top</a>
</div>
</div>
</footer>
<script src="http://wiki.eclipse.org/skins/solstice/public/javascript/main.min.js"></script>
<!-- Placed at the end of the document so the pages load faster -->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-910670-2']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script> <!-- /footer -->
<script>/*<![CDATA[*/window.jQuery && jQuery.ready();/*]]>*/</script><script>if(window.mw){
mw.loader.state({"skins.solstice":"loading","site":"ready","user":"ready","user.groups":"ready"});
}</script>
<script src="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=skins.solstice&amp;only=scripts&amp;skin=solstice&amp;*"></script>
<script>if(window.mw){
mw.loader.load(["mediawiki.action.view.postEdit","mediawiki.user","mediawiki.hidpi","mediawiki.page.ready","mediawiki.searchSuggest"],null,true);
}</script>
<script>if(window.mw){
mw.config.set({"wgBackendResponseTime":356});
}</script> </body>
</html>