blob: 50a0b335ae49b791899d910c8612684c9309c702 [file] [log] [blame]
{
"jobs":[
{
"name":"indexUpdate",
"workflow":"indexUpdate",
"parameters":{
"tempStore":"temp"
}
},
{
"name":"indexUpdateFeed",
"workflow":"indexUpdateFeed",
"parameters":{
"tempStore":"temp"
}
},
{
"name":"indexUpdateXml",
"workflow":"indexUpdateXml",
"parameters":{
"tempStore":"temp",
"xmlSnippetJobName":"indexUpdateXml"
}
},
{
"name":"crawlFilesystem",
"workflow":"fileCrawling",
"parameters":{
"tempStore":"temp",
"dataSource":"file",
"rootFolder":"< EDIT: directory to crawl >",
"jobToPushTo":"indexUpdate",
"mapping":{
"fileContent":"Content",
"filePath":"Path",
"fileName":"Filename",
"fileSize":"Size",
"fileExtension":"Extension",
"fileLastModified":"LastModifiedDate"
}
}
},
{
"name":"crawlSmilaWiki",
"workflow":"webCrawling",
"parameters":{
"tempStore":"temp",
"dataSource":"web",
"jobToPushTo":"indexUpdate",
"startUrl":"http://wiki.eclipse.org/SMILA",
"linksPerBulk": 100,
"filters":{
"urlPatterns": {
"include": ["http://wiki\\.eclipse\\.org/SMILA.*",
"http://wiki\\.eclipse\\.org/Image:.*",
"http://wiki\\.eclipse\\.org/images/.*"],
"exclude": [".*\\?.*" ]
}
},
"mapping": {
"httpCharset": "Charset",
"httpContenttype": "ContentType",
"httpLastModified": "LastModifiedDate",
"httpMimetype": "MimeType",
"httpSize": "Size",
"httpUrl": "Url",
"httpContent": "Content"
}
}
}
]
}