blob: e8dbc35a3e001b2c834b4ca95b80e478808f72dd [file] [log] [blame]
var htmlToTextPipelet = pipelets.create("org.eclipse.smila.processing.pipelets.HtmlToTextPipelet", {
"inputType" : "ATTRIBUTE",
"outputType" : "ATTRIBUTE",
"inputName" : "Content",
"outputName" : "Content"
});
var solrIndexPipelet = pipelets.create("org.eclipse.smila.solr.update.SolrUpdatePipelet", {
"indexname" : "collection1",
"update" : {
"operation" : "ADD",
"commitWithinMs" : "600000",
"mapping" : {
"_source" : "",
"Path" : "",
"Url" : "",
"Filename" : "",
"MimeType" : "",
"Size" : "",
"LastModifiedDate" : "",
"Content" : "",
"Extension" : "",
"Title" : "",
"Author" : ""
}
}
});
function subAttributeExtract(record, iPath, oPath, mode) {
var pipelet = pipelets.create("org.eclipse.smila.processing.pipelets.SubAttributeExtractorPipelet", {
"inputPath" : iPath,
"outputPath" : oPath,
"mode" : mode
});
return pipelet.process(record);
};
/* called by worker: initialize for task. */
function prepare(parameters) {
}
/* called by worker: process single record from bulk. */
function processRecord(record) {
subAttributeExtract(record, "Contents/Type", "MimeType", "FIRST");
subAttributeExtract(record, "Contents/Value", "Content", "ALL_AS_ONE");
subAttributeExtract(record, "Description/Value", "Content", "ALL_AS_ONE");
subAttributeExtract(record, "Links/Href", "Url", "FIRST");
subAttributeExtract(record, "Authors/Name", "Author", "ALL_AS_LIST");
if (("MimeType" in record) && (record.MimeType == "text/xml" || record.MimeType == "text/html")) {
htmlToTextPipelet.process(record);
}
return solrIndexPipelet.process(record);
}