blob: fa3689923e58101668b1bc6abeb2c84fc9375198 [file] [log] [blame]
{
"class" : "org.eclipse.smila.processing.pipelets.boilerpipe.BoilerpipePipelet",
"description": "Removes all boiler plates and HTML tags from a web page.",
"parameters": [
{
"name": "inputType",
"type": "string",
"values": ["ATTACHMENT", "ATTRIBUTE"],
"description": "Defines whether the HTML input is found in an attachment or in an attribute of the record"
},
{
"name": "outputType",
"type": "string",
"values": ["ATTACHMENT", "ATTRIBUTE"],
"description": "Defines whether the plain text should be stored in an attachment or in an attribute of the record"
},
{
"name": "inputName",
"type": "string",
"description": "Name of attachment or attribute that contains the HTML input"
},
{
"name": "outputName",
"type": "string",
"description": "Name of attachment or attribute for plain text output"
},
{
"name": "encodingAttribute",
"type": "string",
"optional": true,
"description": "Optional name of the attribute with the encoding of the input attachment."
},
{
"name": "defaultEncoding",
"type": "string",
"optional": true,
"description": "Fallback encoding, if anything else fails."
},
{
"name": "filter",
"type": "string",
"optional": true,
"multi": true,
"description": "A list of the boiler pipe filters to use, may contain class names, or static method or static variable references (defaults to de.l3s.boilerpipe.extractors.ArticleExtractor.INSTANCE)."
}
]
}