blob: dfe9f2f47a6aaaa3dc4742d443ac4ae48eae5e36 [file] [log] [blame]
{
"jobs":[
{
"name":"indexUpdate",
"workflow":"indexUpdate",
"parameters":{
"tempStore":"temp"
}
},
{
"name":"indexUpdateFeed",
"workflow":"indexUpdateFeed",
"parameters":{
"tempStore":"temp"
}
},
{
"name":"indexUpdateXml",
"workflow":"indexUpdateXml",
"parameters":{
"tempStore":"temp"
}
},
{
"name":"crawlFilesystem",
"workflow":"fileCrawling",
"parameters":{
"tempStore":"temp",
"dataSource":"file",
"rootFolder":"< EDIT: directory to crawl >",
"jobToPushTo":"indexUpdate",
"mapping":{
"fileContent":"Content",
"filePath":"Path",
"fileName":"Filename",
"fileSize":"Size",
"fileExtension":"Extension",
"fileLastModified":"LastModifiedDate"
}
}
},
{
"name":"crawlSmilaWiki",
"workflow":"webCrawling",
"parameters":{
"tempStore":"temp",
"dataSource":"web",
"jobToPushTo":"indexUpdate",
"startUrl":"http://wiki.eclipse.org/SMILA",
"linksPerBulk": 100,
"filters":{
"urlPatterns": {
"include": ["http://wiki\\.eclipse\\.org/SMILA.*",
"http://wiki\\.eclipse\\.org/Image:.*",
"http://wiki\\.eclipse\\.org/images/.*"],
"exclude": [".*\\?.*",
"http://wiki\\.eclipse\\.org/images/archive/.*",
".*\\.java"]
}
},
"mapping": {
"httpCharset": "Charset",
"httpContenttype": "ContentType",
"httpLastModified": "LastModifiedDate",
"httpMimetype": "MimeType",
"httpSize": "Size",
"httpUrl": "Url",
"httpContent": "Content"
}
}
},
{
"name":"crawlFeed",
"workflow":"feedCrawling",
"parameters":{
"tempStore":"temp",
"dataSource":"feed",
"jobToPushTo":"indexUpdateFeed",
"feedUrls":["https://www.eclipse.org/forums/feed.php?mode=m&l=1&basic=1&frm=36&n=100",
"https://dev.eclipse.org/mhonarc/lists/smila-user/maillist.rss",
"https://dev.eclipse.org/mhonarc/lists/smila-dev/maillist.rss"],
"deltaProperties": ["itemPublishDate", "itemUpdateDate"],
"deltaImportStrategy":"additive",
"mapping": {
"itemUri":"Url",
"itemTitle":"Title",
"itemUpdateDate":"LastModifiedDate",
"itemContents": "Contents",
"itemDescription": "Description",
"itemAuthors": "Authors",
"itemLinks": "Links"
}
}
}
]
}