blob: 71cf09489519a41d5492ca0fd84a033cb9e547e4 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="SMILA/Documentation/Bundle org.eclipse.smila.processing.pipelets.boilerpipe,SMILA/Development Guidelines/How to write a Pipelet,SMILA/Documentation/Bundle org.eclipse.smila.processing.pipelets" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/SMILA/Documentation/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (English)" />
<link rel="alternate" type="application/rss+xml" title="Eclipsepedia RSS Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=rss" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom Feed" href="http://wiki.eclipse.org/index.php?title=Special:Recentchanges&amp;feed=atom" />
<title>SMILA/Documentation/Bundle org.eclipse.smila.processing.pipelets.boilerpipe - Eclipsepedia</title>
<style type="text/css" media="screen,projection">/*<![CDATA[*/ @import "http://wiki.eclipse.org/skins/eclipsenova/novaWide.css?116"; /*]]>*/</style>
<link rel="stylesheet" type="text/css" media="print" href="http://wiki.eclipse.org/skins/eclipsenova/eclipsenovaPrint.css?116" />
<link rel="stylesheet" type="text/css" media="handheld" href="http://wiki.eclipse.org/skins/eclipsenova/handheld.css?116" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/header.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/tabs.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/visual.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/layout.css" media="screen" />
<link rel="stylesheet" type="text/css" href="http://wiki.eclipse.org/skins/eclipsenova/Nova/css/footer.css" media="screen" />
<!--[if IE]><link rel="stylesheet" type="text/css" href="/skins/eclipsenova/IEpngfix.css" media="screen" /><![endif]-->
<!--[if lt IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE50Fixes.css?116";</style> <![endif]-->
<!--[if IE 5.5000]><style type="text/css">@import "/skins/eclipsenova/IE55Fixes.css?116";</style><![endif]-->
<!--[if IE 6]><style type="text/css">@import "/skins/eclipsenova/IE60Fixes.css?116";</style><![endif]-->
<!--[if IE 7]><style type="text/css">@import "/skins/eclipsenova/IE70Fixes.css?116";</style><![endif]-->
<!--[if lt IE 7]><script type="text/javascript" src="/skins/common/IEFixes.js?116"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "eclipsenova";
var stylepath = "/skins";
var wgArticlePath = "/$1";
var wgScriptPath = "";
var wgScript = "/index.php";
var wgServer = "http://wiki.eclipse.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe";
var wgTitle = "SMILA/Documentation/Bundle org.eclipse.smila.processing.pipelets.boilerpipe";
var wgAction = "view";
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
var wgArticleId = "37346";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = "315152";
var wgVersion = "1.12.0";
var wgEnableAPI = true;
var wgEnableWriteAPI = false;
/*]]>*/</script>
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/wikibits.js?116"><!-- wikibits js --></script>
<!-- Performance mods similar to those for bug 166401 -->
<script type="text/javascript" src="http://wiki.eclipse.org/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=eclipsenova"><!-- site js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="http://wiki.eclipse.org/skins/common/ajax.js?116"></script>
<style type="text/css">/*<![CDATA[*/
.source-xml {line-height: normal; font-size: medium;}
.source-xml li {line-height: normal;}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for xml
* CSS class: source-xml, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie (http://qbnz.com/highlighter)
*/
.source-xml .de1, .source-xml .de2 {font-family: 'Courier New', Courier, monospace; font-weight: normal;}
.source-xml {}
.source-xml .head {}
.source-xml .foot {}
.source-xml .imp {font-weight: bold; color: red;}
.source-xml .ln-xtra {color: #cc0; background-color: #ffc;}
.source-xml li {font-family: 'Courier New', Courier, monospace; color: black; font-weight: normal; font-style: normal;}
.source-xml li.li2 {font-weight: bold;}
.source-xml .coMULTI {color: #808080; font-style: italic;}
.source-xml .es0 {color: #000099; font-weight: bold;}
.source-xml .br0 {color: #66cc66;}
.source-xml .st0 {color: #ff0000;}
.source-xml .nu0 {color: #cc66cc;}
.source-xml .sc0 {color: #00bbdd;}
.source-xml .sc1 {color: #ddbb00;}
.source-xml .sc2 {color: #339933;}
.source-xml .sc3 {color: #009900;}
.source-xml .re0 {color: #000066;}
.source-xml .re1 {font-weight: bold; color: black;}
.source-xml .re2 {font-weight: bold; color: black;}
/*]]>*/
</style>
<style type="text/css">/*<![CDATA[*/
@import "http://wiki.eclipse.org/index.php?title=MediaWiki:Geshi.css&usemsgcache=yes&action=raw&ctype=text/css&smaxage=18000";
/*]]>*/
</style><link rel="stylesheet" type="text/css" href="Bundle_org.eclipse.smila.processing.pipelets.boilerpipe.html" /> </head>
<body class="mediawiki ns-0 ltr page-SMILA_Documentation_Bundle_org_eclipse_smila_processing_pipelets_boilerpipe">
<div id="globalWrapper">
<div id="column-one">
<!-- Eclipse Additions for the Top Nav start here M. Ward-->
<div id="header">
<div id="header-graphic">
<img src="http://wiki.eclipse.org/skins/eclipsenova/eclipse.png" alt="Eclipse Wiki">
</div>
<!-- Pulled 101409 Mward -->
<div class="portlet" id="p-personal">
<div class="pBody">
<ul>
<li id="pt-login"><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe">Log in</a></li>
</ul>
</div>
</div>
<div id="header-icons">
<div id="sites">
<ul id="sitesUL">
<li><a href="http://www.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/eclipseIcon.png" width="28" height="28" alt="Eclipse Foundation" title="Eclipse Foundation" /><div>Eclipse Foundation</div></a></li>
<li><a href="http://marketplace.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/marketplace.png" width="28" height="28" alt="Eclipse Marketplace" title="Eclipse Marketplace" /><div>Eclipse Marketplace</div></a></li>
<li><a href="https://bugs.eclipse.org/bugs"><img src="http://dev.eclipse.org/custom_icons/system-search-bw.png" width="28" height="28" alt="Bugzilla" title="Bugzilla" /><div>Bugzilla</div></a></li>
<li><a href="http://live.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/audio-input-microphone-bw.png" width="28" height="28" alt="Live" title="Live" /><div>Eclipse Live</div></a></li>
<li><a href="http://planeteclipse.org"><img src="http://dev.eclipse.org/large_icons/devices/audio-card.png" width="28" height="28" alt="PlanetEclipse" title="Planet" /><div>Planet Eclipse</div></a></li>
<li><a href="http://portal.eclipse.org"><img src="http://dev.eclipse.org/custom_icons/preferences-system-network-proxy-bw.png" width="28" height="28" alt="Portal" title="Portal" /><div>My Foundation Portal</div></a></li>
</ul>
</div>
</div>
</div>
<!-- NEW HEADER STUFF HERE -->
<div id="header-menu">
<div id="header-nav">
<ul> <li><a class="first_one" href="http://wiki.eclipse.org/" target="_self">Home</a></li> <li><a href="http://www.eclipse.org/downloads/" target="_self">Downloads</a></li>
<li><a href="http://www.eclipse.org/users/" target="_self">Users</a></li>
<li><a href="http://www.eclipse.org/membership/" target="_self">Members</a></li>
<li><a href="http://wiki.eclipse.org/index.php/Development_Resources" target="_self">Committers</a></li>
<li><a href="http://www.eclipse.org/resources/" target="_self">Resources</a></li>
<li><a href="http://www.eclipse.org/projects/" target="_self">Projects</a></li>
<li><a href="http://www.eclipse.org/org/" target="_self">About Us</a></li>
</ul>
</div>
<div id="header-utils">
<!-- moved the search window here -->
<form action="http://wiki.eclipse.org/Special:Search" >
<input class="input" name="search" type="text" accesskey="f" value="" />
<input type='submit' onclick="this.submit();" name="go" id="searchGoButton" class="button" title="Go to a page with this exact name if one exists" value="Go" />&nbsp;
<input type='submit' onclick="this.submit();" name="fulltext" class="button" id="mw-searchButton" title="Search Eclipsepedia for this text" value="Search" />
</form>
</div>
</div>
<!-- Eclipse Additions for the Header stop here -->
<!-- Additions and mods for leftside nav Start here -->
<!--Started nav rip here-->
<!-- these are the nav controls main page, changes etc -->
<div id="novaContent" class="faux">
<div id="leftcol">
<ul id="leftnav">
<!-- these are the page controls, edit history etc -->
<li class="separator"><a class="separator">Navigation &#160;&#160;</li>
<li id="n-mainpage"><a href="http://wiki.eclipse.org/Main_Page">Main Page</a></li>
<li id="n-portal"><a href="http://wiki.eclipse.org/Eclipsepedia:Community_Portal">Community portal</a></li>
<li id="n-currentevents"><a href="http://wiki.eclipse.org/Eclipsepedia:Current_events">Current events</a></li>
<li id="n-recentchanges"><a href="http://wiki.eclipse.org/Special:Recentchanges">Recent changes</a></li>
<li id="n-randompage"><a href="http://wiki.eclipse.org/Special:Random">Random page</a></li>
<li id="n-help"><a href="http://wiki.eclipse.org/Help:Contents">Help</a></li>
<li class="separator"><a class="separator">Toolbox &#160;&#160;</a></li>
<li id="t-whatlinkshere"><a href="http://wiki.eclipse.org/Special:Whatlinkshere/SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe">What links here</a></li>
<li id="t-recentchangeslinked"><a href="http://wiki.eclipse.org/Special:Recentchangeslinked/SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe">Related changes</a></li>
<!-- This is the toolbox section -->
<li id="t-upload"><a href="http://wiki.eclipse.org/Special:Upload">Upload file</a></li>
<li id="t-specialpages"><a href="http://wiki.eclipse.org/Special:Specialpages">Special pages</a></li>
<li id="t-print"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe&amp;printable=yes">Printable version</a></li> <li id="t-permalink"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe&amp;oldid=315152">Permanent link</a></li> </ul>
</div>
<!-- Additions and mods for leftside nav End here -->
<div id="column-content">
<div id="content">
<a name="top" id="top"></a>
<div id="tabs">
<ul class="primary">
<li class="active"><a href="Bundle_org.eclipse.smila.processing.pipelets.boilerpipe.html"><span class="tab">Page</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe&amp;action=edit"><span class="tab">Discussion</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe&amp;action=edit"><span class="tab">View source</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe&amp;action=history"><span class="tab">History</span></a></li>
<li><a href="http://wiki.eclipse.org/index.php?title=Special:Userlogin&amp;returnto=SMILA/Documentation/Bundle&#32;org.eclipse.smila.processing.pipelets.boilerpipe"><span class="tab">Edit</span></a></li>
</ul>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<h1 class="firstHeading">SMILA/Documentation/Bundle org.eclipse.smila.processing.pipelets.boilerpipe</h1>
<div id="bodyContent">
<h3 id="siteSub">From Eclipsepedia</h3>
<div id="contentSub"><span class="subpages">&lt; <a href="../../SMILA.html" title="SMILA">SMILA</a> | <a href="../Documentation.1.html" title="SMILA/Documentation">Documentation</a></span></div>
<div id="jump-to-nav">Jump to: <a href="Bundle_org.eclipse.smila.processing.pipelets.boilerpipe.html#column-one">navigation</a>, <a href="Bundle_org.eclipse.smila.processing.pipelets.boilerpipe.html#searchInput">search</a></div> <!-- start content -->
<p>This page describes the SMILA pipelets provided by bundle <tt>org.eclipse.smila.processing.pipelets.boilerpipe</tt>.
</p>
<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1"><a href="Bundle_org.eclipse.smila.processing.pipelets.boilerpipe.html#General"><span class="tocnumber">1</span> <span class="toctext">General</span></a></li>
<li class="toclevel-1"><a href="Bundle_org.eclipse.smila.processing.pipelets.boilerpipe.html#org.eclipse.smila.processing.pipelets.boilerpipe.BoilerpipePipelet"><span class="tocnumber">2</span> <span class="toctext">org.eclipse.smila.processing.pipelets.boilerpipe.BoilerpipePipelet</span></a>
<ul>
<li class="toclevel-2"><a href="Bundle_org.eclipse.smila.processing.pipelets.boilerpipe.html#Configuration"><span class="tocnumber">2.1</span> <span class="toctext">Configuration</span></a></li>
<li class="toclevel-2"><a href="Bundle_org.eclipse.smila.processing.pipelets.boilerpipe.html#Example"><span class="tocnumber">2.2</span> <span class="toctext">Example</span></a></li>
</ul>
</li>
</ul>
</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
<a name="General"></a><h2> <span class="mw-headline"> General </span></h2>
<p>All pipelets in this bundle support the configurable error handling as described in <a href="../Development_Guidelines/How_to_write_a_Pipelet.html#Implementation" class="mw-redirect" title="SMILA/Development Guidelines/How to write a Pipelet">SMILA/Development_Guidelines/How_to_write_a_Pipelet#Implementation</a>. When used in jobmanager workflows, records causing errors are dropped.
</p><p><b> Read Type </b>
</p>
<ul><li> <i>runtime</i>: Parameters are read when processing records. Parameter value can be set per Record.
</li><li> <i>init</i>: Parameters are read once from Pipelet configuration when initializing the Pipelet. Parameter value can not be overwritten in Record.
</li></ul>
<a name="org.eclipse.smila.processing.pipelets.boilerpipe.BoilerpipePipelet"></a><h2> <span class="mw-headline"> org.eclipse.smila.processing.pipelets.boilerpipe.BoilerpipePipelet </span></h2>
<p>Extracts text from an HTML input using the <a href="http://code.google.com/p/boilerpipe/" class="external text" title="http://code.google.com/p/boilerpipe/" rel="nofollow">Boilerpipe library</a>. In contrast to the <a href="Bundle_org.eclipse.smila.processing.pipelets.html#org.eclipse.smila.processing.pipelets.HtmlToTextPipelet" title="SMILA/Documentation/Bundle org.eclipse.smila.processing.pipelets"> HtmlToTextPipelet</a> it offers different algorithms for textual content extraction but does not extract HTML metadata.
</p>
<a name="Configuration"></a><h3> <span class="mw-headline"> Configuration </span></h3>
<table border="1">
<tr>
<th>Property
</th><th>Type
</th><th>Read Type
</th><th>Description
</th></tr>
<tr>
<td><i>inputType</i>
</td><td>String&nbsp;: <i>ATTACHMENT, ATTRIBUTE</i>
</td><td>runtime
</td><td>Defines whether the HTML input is found in an attachment or in an attribute of the record
</td></tr>
<tr>
<td><i>outputType</i>
</td><td>String&nbsp;: <i>ATTACHMENT, ATTRIBUTE</i>
</td><td>runtime
</td><td>Defines whether the plain text should be stored in an attachment or in an attribute of the record
</td></tr>
<tr>
<td><i>inputName</i>
</td><td>String
</td><td>runtime
</td><td>Name of attachment or attribute that contains the HTML input
</td></tr>
<tr>
<td><i>outputName</i>
</td><td>String
</td><td>runtime
</td><td>Name of attachment or attribute for plain text output
</td></tr>
<tr>
<td><i>encodingAttribute</i>
</td><td>String
</td><td>runtime
</td><td>Optional name of the attribute with the encoding of the input attachment.
</td></tr>
<tr>
<td><i>defaultEncoding</i>
</td><td>String
</td><td>runtime
</td><td>Optional fallback encoding, if anything else fails.
</td></tr>
<tr>
<td><i>filter</i>
</td><td>Sequence of String
</td><td>init
</td><td>A list of boiler pipe filters to use. This may contain class names, static method or static variable references. Default is <tt>de.l3s.boilerpipe.extractors.ArticleExtractor.INSTANCE</tt>. Please note that BoilerpipeExtractors implement the interface BoilerpipeFilter and are pipelines of BoilerpipeFilters. Therefore you should not use multiple BoilerpipeExtractors! Also please note that some Extractors and Filters do not have a default Constructor and therefore cannot be used by this Pipelet. Others may not have a public Constructor but a public static instance member.
</td></tr></table>
<p><br />
</p>
<a name="Example"></a><h3> <span class="mw-headline"> Example </span></h3>
<p>Extract text from the HTML input in attachment "html" into the attribute "text" using the encoding given in attribute "http.encoding" and using the extractor ArticleSentencesExtractor:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;proc:invokePipelet</span> <span class="re0">name</span>=<span class="st0">&quot;extractText&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;proc:pipelet</span> <span class="re0">class</span>=<span class="st0">&quot;org.eclipse.smila.processing.pipelets.boilerpipe.BoilerpipePipelet&quot;</span> <span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;proc:variables</span> <span class="re0">input</span>=<span class="st0">&quot;request&quot;</span> <span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;proc:configuration<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;inputType&quot;</span><span class="re2">&gt;</span></span>ATTACHMENT<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;inputName&quot;</span><span class="re2">&gt;</span></span>html<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;outputType&quot;</span><span class="re2">&gt;</span></span>ATTRIBUTE<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;outputName&quot;</span><span class="re2">&gt;</span></span>text<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;encodingAttribute&quot;</span><span class="re2">&gt;</span></span>http.encoding<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;filter&quot;</span><span class="re2">&gt;</span></span>de.l3s.boilerpipe.extractors.ArticleSentencesExtractor.INSTANCE<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/proc:configuration<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/proc:invokePipelet<span class="re2">&gt;</span></span></span></pre></div>
<p><br />
The same example but using the simple filter MarkEverythingContentFilter:
</p>
<div dir="ltr" style="text-align: left;"><pre class="source-xml"><span class="sc3"><span class="re1">&lt;proc:invokePipelet</span> <span class="re0">name</span>=<span class="st0">&quot;extractText&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;proc:pipelet</span> <span class="re0">class</span>=<span class="st0">&quot;org.eclipse.smila.processing.pipelets.boilerpipe.BoilerpipePipelet&quot;</span> <span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;proc:variables</span> <span class="re0">input</span>=<span class="st0">&quot;request&quot;</span> <span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;proc:configuration<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;inputType&quot;</span><span class="re2">&gt;</span></span>ATTACHMENT<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;inputName&quot;</span><span class="re2">&gt;</span></span>html<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;outputType&quot;</span><span class="re2">&gt;</span></span>ATTRIBUTE<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;outputName&quot;</span><span class="re2">&gt;</span></span>text<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;encodingAttribute&quot;</span><span class="re2">&gt;</span></span>http.encoding<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;rec:Val</span> <span class="re0">key</span>=<span class="st0">&quot;filter&quot;</span><span class="re2">&gt;</span></span>de.l3s.boilerpipe.filters.simple.MarkEverythingContentFilter.INSTANCE<span class="sc3"><span class="re1">&lt;/rec:Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/proc:configuration<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/proc:invokePipelet<span class="re2">&gt;</span></span></span></pre></div>
<!--
NewPP limit report
Preprocessor node count: 13/1000000
Post-expand include size: 0/2097152 bytes
Template argument size: 0/2097152 bytes
#ifexist count: 0/100
-->
<!-- Saved in parser cache with key wikidb:pcache:idhash:37346-0!1!0!!en!2!edit=0 and timestamp 20130416060945 -->
<div class="printfooter">
Retrieved from "<a href="Bundle_org.eclipse.smila.processing.pipelets.boilerpipe.html">http://wiki.eclipse.org/SMILA/Documentation/Bundle_org.eclipse.smila.processing.pipelets.boilerpipe</a>"</div>
<div id="catlinks"><p class='catlinks'><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Categories</a>: <span dir='ltr'><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></span> | <span dir='ltr'><a href="http://wiki.eclipse.org/index.php?title=Category:SMILA/Pipelet&amp;action=edit" class="new" title="Category:SMILA/Pipelet">SMILA/Pipelet</a></span></p></div> <!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<!-- Yoink of toolbox for phoenix moved up -->
</div>
</div>
<div id="clearFooter"/>
<div id="footer" >
<ul id="footernav">
<li class="first"><a href="http://www.eclipse.org/">Home</a></li>
<li><a href="http://www.eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="http://www.eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="http://www.eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="http://www.eclipse.org/org/foundation/contact.php">Contact</a></li>
<li><a href="http://wiki.eclipse.org/Eclipsepedia:About" title="Eclipsepedia:About">About Eclipsepedia</a></li>
</ul>
<span id="copyright">Copyright &copy; 2013 The Eclipse Foundation. All Rights Reserved</span>
<p id="footercredit">This page was last modified 12:44, 11 September 2012 by <a href="http://wiki.eclipse.org/index.php?title=User:Daniel.stucky.attensity.com&amp;action=edit" class="new" title="User:Daniel.stucky.attensity.com">Daniel Stucky</a>. Based on work by <a href="http://wiki.eclipse.org/User:Eclipse.liefke.biz" title="User:Eclipse.liefke.biz">Tobias Liefke</a>.</p>
<p id="footerviews">This page has been accessed 643 times.</p>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
var pageTracker = _gat._getTracker("UA-910670-4");
pageTracker._trackPageview();
</script>
<!-- <div class="visualClear"></div> -->
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served in 0.053 secs. --></body></html>