blob: cdbd0865be26df873492403322dcfd2ccd37f603 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir="ltr" class="client-nojs">
<head>
<meta charset="UTF-8" />
<title>SMILA/Documentation/Importing/CrawlingMultipleStartURLs - Eclipsepedia</title>
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="generator" content="MediaWiki 1.23.2" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/eclipse.org-common/themes/solstice/public/images/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (en)" />
<link rel="EditURI" type="application/rsd+xml" href="http://wiki.eclipse.org/api.php?action=rsd" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom feed" href="http://wiki.eclipse.org/index.php?title=Special:RecentChanges&amp;feed=atom" />
<link rel="stylesheet" href="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=mediawiki.legacy.commonPrint%2Cshared%7Cmediawiki.ui.button&amp;only=styles&amp;skin=solstice&amp;*" />
<link rel="stylesheet" href="http://wiki.eclipse.org/skins/solstice/public/stylesheets/styles.min.css?303" media="screen, print" /><meta name="ResourceLoaderDynamicStyles" content="" />
<style>a:lang(ar),a:lang(kk-arab),a:lang(mzn),a:lang(ps),a:lang(ur){text-decoration:none}
/* cache key: my_wiki:resourceloader:filter:minify-css:7:14ece53a42aa314864e5fd8c57f0d98f */</style>
<script src="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=startup&amp;only=scripts&amp;skin=solstice&amp;*"></script>
<script>if(window.mw){
mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"SMILA/Documentation/Importing/CrawlingMultipleStartURLs","wgTitle":"SMILA/Documentation/Importing/CrawlingMultipleStartURLs","wgCurRevisionId":346926,"wgRevisionId":346926,"wgArticleId":38496,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["SMILA"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRelevantPageName":"SMILA/Documentation/Importing/CrawlingMultipleStartURLs","wgIsProbablyEditable":false,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgWikiEditorEnabledModules":{"toolbar":false,"dialogs":false,"hidesig":true,"preview":false,"previewDialog":false,"publish":false},"wgCategoryTreePageCategoryOptions":"{\"mode\":0,\"hideprefix\":20,\"showcount\":true,\"namespaces\":false}"});
}</script><script>if(window.mw){
mw.loader.implement("user.options",function($,jQuery){mw.user.options.set({"ccmeonemails":0,"cols":80,"date":"default","diffonly":0,"disablemail":0,"editfont":"default","editondblclick":0,"editsectiononrightclick":0,"enotifminoredits":0,"enotifrevealaddr":0,"enotifusertalkpages":1,"enotifwatchlistpages":1,"extendwatchlist":0,"fancysig":0,"forceeditsummary":0,"gender":"unknown","hideminor":0,"hidepatrolled":0,"imagesize":2,"math":1,"minordefault":0,"newpageshidepatrolled":0,"nickname":"","norollbackdiff":0,"numberheadings":0,"previewonfirst":0,"previewontop":1,"rcdays":7,"rclimit":50,"rows":25,"showhiddencats":0,"shownumberswatching":1,"showtoolbar":1,"skin":"solstice","stubthreshold":0,"thumbsize":2,"underline":2,"uselivepreview":0,"usenewrc":0,"watchcreations":1,"watchdefault":1,"watchdeletion":0,"watchlistdays":3,"watchlisthideanons":0,"watchlisthidebots":0,"watchlisthideliu":0,"watchlisthideminor":0,"watchlisthideown":0,"watchlisthidepatrolled":0,"watchmoves":0,"wllimit":250,
"useeditwarning":1,"prefershttps":1,"language":"en","variant-gan":"gan","variant-iu":"iu","variant-kk":"kk","variant-ku":"ku","variant-shi":"shi","variant-sr":"sr","variant-tg":"tg","variant-uz":"uz","variant-zh":"zh","searchNs0":true,"searchNs1":false,"searchNs2":false,"searchNs3":false,"searchNs4":false,"searchNs5":false,"searchNs6":false,"searchNs7":false,"searchNs8":false,"searchNs9":false,"searchNs10":false,"searchNs11":false,"searchNs12":false,"searchNs13":false,"searchNs14":false,"searchNs15":false,"variant":"en"});},{},{});mw.loader.implement("user.tokens",function($,jQuery){mw.user.tokens.set({"editToken":"+\\","patrolToken":false,"watchToken":false});},{},{});
/* cache key: my_wiki:resourceloader:filter:minify-js:7:70d74423d3fc1e1c18fa9a1ff645a84a */
}</script>
<script>if(window.mw){
mw.loader.load(["mediawiki.page.startup","mediawiki.legacy.wikibits","mediawiki.legacy.ajax"]);
}</script>
<style type="text/css">/*<![CDATA[*/
.source-javascript {line-height: normal;}
.source-javascript li, .source-javascript pre {
line-height: normal; border: 0px none white;
}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for javascript
* CSS class: source-javascript, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann
* (http://qbnz.com/highlighter/ and http://geshi.org/)
* --------------------------------------
*/
.javascript.source-javascript .de1, .javascript.source-javascript .de2 {font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;}
.javascript.source-javascript {font-family:monospace;}
.javascript.source-javascript .imp {font-weight: bold; color: red;}
.javascript.source-javascript li, .javascript.source-javascript .li1 {font-weight: normal; vertical-align:top;}
.javascript.source-javascript .ln {width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;}
.javascript.source-javascript .li2 {font-weight: bold; vertical-align:top;}
.javascript.source-javascript .kw1 {color: #000066; font-weight: bold;}
.javascript.source-javascript .kw2 {color: #003366; font-weight: bold;}
.javascript.source-javascript .kw3 {color: #000066;}
.javascript.source-javascript .kw5 {color: #FF0000;}
.javascript.source-javascript .co1 {color: #006600; font-style: italic;}
.javascript.source-javascript .co2 {color: #009966; font-style: italic;}
.javascript.source-javascript .coMULTI {color: #006600; font-style: italic;}
.javascript.source-javascript .es0 {color: #000099; font-weight: bold;}
.javascript.source-javascript .br0 {color: #009900;}
.javascript.source-javascript .sy0 {color: #339933;}
.javascript.source-javascript .st0 {color: #3366CC;}
.javascript.source-javascript .nu0 {color: #CC0000;}
.javascript.source-javascript .me1 {color: #660066;}
.javascript.source-javascript .ln-xtra, .javascript.source-javascript li.ln-xtra, .javascript.source-javascript div.ln-xtra {background-color: #ffc;}
.javascript.source-javascript span.xtra { display:block; }
/*]]>*/
</style><meta name="viewport" content="width=device-width, initial-scale=1.0"></head>
<body class="mediawiki ltr sitedir-ltr ns-0 ns-subject page-SMILA_Documentation_Importing_CrawlingMultipleStartURLs skin-solstice action-view" id="solstice">
<a class="sr-only" href="CrawlingMultipleStartURLs.html#content">Skip to main content</a>
<div class="thin-header">
<header role="banner" class="hidden-print noprint">
<div class="container-fluid">
<div id="row-logo-search">
<div id="header-left">
<div class="row">
<div class="hidden-xs col-sm-6 logo-container">
<a href="https://www.eclipse.org/" ><img class="logo-eclipse-default" src="http://wiki.eclipse.org/skins/solstice/public/images/logo/eclipse-800x188.png" alt="Eclipsepedia"></a>
</div>
<div class="navbar col-sm-18 yamm" id="main-menu">
<div class="navbar-collapse collapse" id="navbar-collapse-1">
<ul class="nav navbar-nav">
<li><a target="_self" href="https://eclipse.org/downloads/">Download</a></li>
<li><a target="_self" href="https://eclipse.org/users/">Getting Started </a></li>
<li><a target="_self" href="https://eclipse.org/membership/">Members</a></li>
<li><a target="_self" href="https://eclipse.org/projects/">Projects</a></li>
<li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="CrawlingMultipleStartURLs.html#">Community <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="http://marketplace.eclipse.org">Marketplace</a></li><li><a href="http://events.eclipse.org">Events</a></li><li><a href="http://www.planeteclipse.org/">Planet Eclipse</a></li><li><a href="https://eclipse.org/community/eclipse_newsletter/">Newsletter</a></li><li><a href="https://www.youtube.com/user/EclipseFdn">Videos</a></li></ul></li><li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="CrawlingMultipleStartURLs.html#">Participate <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li><li><a href="https://eclipse.org/forums/">Forums</a></li><li><a href="https://eclipse.org/mail/">Mailing Lists</a></li><li><a href="https://wiki.eclipse.org/">Wiki</a></li><li><a href="https://wiki.eclipse.org/IRC">IRC</a></li><li><a href="https://eclipse.org/contribute/">How to Contribute</a></li></ul></li><li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="CrawlingMultipleStartURLs.html#">Working Groups <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="http://wiki.eclipse.org/Auto_IWG">Automotive</a></li><li><a href="http://iot.eclipse.org">Internet of Things</a></li><li><a href="http://locationtech.org">LocationTech</a></li><li><a href="http://lts.eclipse.org">Long-Term Support</a></li><li><a href="http://polarsys.org">PolarSys</a></li><li><a href="http://science.eclipse.org">Science</a></li><li><a href="http://openmdm.org">OpenMDM</a></li></ul></li><!-- More -->
<li class="dropdown hidden-xs"><a class="dropdown-toggle" data-toggle="dropdown">More<b class="caret"></b></a>
<ul class="dropdown-menu">
<li>
<!-- Content container to add padding -->
<div class="yamm-content">
<div class="row">
<ul class="col-sm-8 list-unstyled"><li><p><strong>Community</strong></p></li><li><a href="http://marketplace.eclipse.org">Marketplace</a></li><li><a href="http://events.eclipse.org">Events</a></li><li><a href="http://www.planeteclipse.org/">Planet Eclipse</a></li><li><a href="https://eclipse.org/community/eclipse_newsletter/">Newsletter</a></li><li><a href="https://www.youtube.com/user/EclipseFdn">Videos</a></li></ul><ul class="col-sm-8 list-unstyled"><li><p><strong>Participate</strong></p></li><li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li><li><a href="https://eclipse.org/forums/">Forums</a></li><li><a href="https://eclipse.org/mail/">Mailing Lists</a></li><li><a href="https://wiki.eclipse.org/">Wiki</a></li><li><a href="https://wiki.eclipse.org/IRC">IRC</a></li><li><a href="https://eclipse.org/contribute/">How to Contribute</a></li></ul><ul class="col-sm-8 list-unstyled"><li><p><strong>Working Groups</strong></p></li><li><a href="http://wiki.eclipse.org/Auto_IWG">Automotive</a></li><li><a href="http://iot.eclipse.org">Internet of Things</a></li><li><a href="http://locationtech.org">LocationTech</a></li><li><a href="http://lts.eclipse.org">Long-Term Support</a></li><li><a href="http://polarsys.org">PolarSys</a></li><li><a href="http://science.eclipse.org">Science</a></li><li><a href="http://openmdm.org">OpenMDM</a></li></ul> </div>
</div>
</li>
</ul>
</li>
</ul>
</div>
<div class="navbar-header">
<button data-target="#navbar-collapse-1" data-toggle="collapse" class="navbar-toggle" type="button">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="https://www.eclipse.org/" class="visible-xs navbar-brand"><img class="logo-eclipse-default" src="http://wiki.eclipse.org/skins/solstice/public/images/logo/eclipse-800x188.png" alt="Eclipsepedia" width="174"></a>
</div>
</div>
</div>
</div>
</div>
</div>
</header>
<section class="defaut-breadcrumbs hidden-print noprint hidden-print clearfix" id="breadcrumb">
<div>
<ol class="breadcrumb">
<li><a href="https://www.eclipse.org/">Home</a></li>
<li><a href="http://wiki.eclipse.org/Main_Page">Eclipse Wiki</a></li>
<li class="active">SMILA/Documentation/Importing/CrawlingMultipleStartURLs</li></ol>
</div>
</section>
</div>
<div class="toolbar-menu breadcrumbs-offset noprint hidden-print margin-bottom-0 clearfix">
<div class="col-md-24">
<ol class="breadcrumb" role="navigation">
<li id="pt-login">
<a href="http://wiki.eclipse.org/index.php?title=Special:UserLogin&amp;returnto=SMILA%2FDocumentation%2FImporting%2FCrawlingMultipleStartURLs">
<i class="fa fa-sign-in fa-fw orange"></i> Log in </a>
</li>
</ul>
</div>
</div>
<main role="main" class="background-grey">
<div class="container-full padding-top-25">
<!-- content -->
<section id="content" class="mw-body container-full clearfix 0">
<div id="mw-js-message" style="display:none;"></div>
<!-- bodyContent -->
<div id="bodyContent">
<!-- jumpto -->
<div id="jump-to-nav" class="mw-jump">
Jump to: <a href="CrawlingMultipleStartURLs.html#mw-head">navigation</a>,
<a href="CrawlingMultipleStartURLs.html#p-search">search</a>
</div>
<!-- /jumpto -->
<!-- leftcol -->
<aside class="col-md-4 noprint hidden-print" id="leftcol">
<form class="input-group" role="form" id="form-eclipse-search" action="http://wiki.eclipse.org/index.php" id="searchform">
<input id="searchInput" class="search-query form-control" type="search" accesskey="f" title="Special:Search" placeholder="Search" name="search" value="">
<span class="input-group-btn">
<button value="search" id="mw-searchButton" type="submit" class="btn btn-default" title="Search the pages for this text" name="fulltext">
<i class="fa fa-search"></i>
</button>
</span>
</form>
<select class="form-control margin-top-10 margin-bottom-10 visible-xs visible-sm" onchange="this.options[this.selectedIndex].value && (window.location = this.options[this.selectedIndex].value);"><option class="fw-700 "><span class="fw-700">---Navigation---</span></option><option value="/Main_Page">Main Page</option><option value="/Eclipsepedia:Community_portal">Community portal</option><option value="/Eclipsepedia:Current_events">Current events</option><option value="/Special:RecentChanges">Recent changes</option><option value="/Special:Random">Random page</option><option value="https://www.mediawiki.org/wiki/Special:MyLanguage/Help:Contents">Help</option></select><ul class="ul-left-nav fa-ul hidden-print leftnav hidden-xs hidden-sm"><li class="separator"><span class="separator">Navigation</span></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Main_Page" id="n-mainpage" title="Visit the main page [z]" accesskey="z">Main Page</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Eclipsepedia:Community_portal" id="n-portal" title="About the project, what you can do, where to find things">Community portal</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Eclipsepedia:Current_events" id="n-currentevents" title="Find background information on current events">Current events</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:RecentChanges" id="n-recentchanges" title="A list of recent changes in the wiki [r]" accesskey="r">Recent changes</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:Random" id="n-randompage" title="Load a random page [x]" accesskey="x">Random page</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="https://www.mediawiki.org/wiki/Special:MyLanguage/Help:Contents" id="n-help" title="The place to find out">Help</a></li></ul> <select class="form-control margin-top-10 margin-bottom-10 visible-xs visible-sm" onchange="this.options[this.selectedIndex].value && (window.location = this.options[this.selectedIndex].value);"><option class="fw-700 "><span class="fw-700">---Toolbox---</span></option><option value="/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;action=info">Page information</option><option value="/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;oldid=346926">Permanent link</option><option value="/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;printable=yes">Printable version</option><option value="/Special:SpecialPages">Special pages</option><option value="/Special:RecentChangesLinked/SMILA/Documentation/Importing/CrawlingMultipleStartURLs">Related changes</option><option value="/Special:WhatLinksHere/SMILA/Documentation/Importing/CrawlingMultipleStartURLs">What links here</option></select><ul class="ul-left-nav fa-ul hidden-print leftnav hidden-xs hidden-sm"><li class="separator"><span class="separator">Toolbox</span></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;action=info" id="t-info">Page information</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;oldid=346926" id="t-permalink" title="Permanent link to this revision of the page">Permanent link</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;printable=yes" id="t-print" rel="alternate" title="Printable version of this page [p]" accesskey="p">Printable version</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:SpecialPages" id="t-specialpages" title="A list of all special pages [q]" accesskey="q">Special pages</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:RecentChangesLinked/SMILA/Documentation/Importing/CrawlingMultipleStartURLs" id="t-recentchangeslinked" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:WhatLinksHere/SMILA/Documentation/Importing/CrawlingMultipleStartURLs" id="t-whatlinkshere" title="A list of all wiki pages that link here [j]" accesskey="j">What links here</a></li></ul> </aside>
<!-- /leftcol -->
<!-- mainContent -->
<div id="mainContent" class="col-md-20">
<ul class="nav nav-tabs noprint hidden-print" role="tablist">
<li id="ca-nstab-main" class="active"><a href="CrawlingMultipleStartURLs.html" title="View the content page [c]" accesskey="c" tabindex="-1">Page</a></li>
<li id="ca-talk" class="new"><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;action=edit&amp;redlink=1" title="Discussion about the content page [t]" accesskey="t" tabindex="-1">Discussion</a></li>
<li id="ca-viewsource"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;action=edit" title="This page is protected.&#10;You can view its source [e]" accesskey="e" tabindex="-1">View source</a></li>
<li id="ca-history" class="collapsible"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;action=history" title="Past revisions of this page [h]" accesskey="h" tabindex="-1">History</a></li>
</ul> <div class="tab-content background-white">
<div id="tab-pane-main-page-content" class="tab-pane active">
<h1 id="firstHeading" class="firstHeading page-header">
<span dir="auto">SMILA/Documentation/Importing/CrawlingMultipleStartURLs</span>
</h1>
<div id="main-page-content">
<!-- subtitle -->
<div id="contentSub" class="alert alert-small alert-warning"><span class="subpages">&lt; <a href="../../../SMILA.html" title="SMILA">SMILA</a>&lrm; | <a href="../../Documentation.1.html" title="SMILA/Documentation">Documentation</a></span></div>
<!-- /subtitle -->
<div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr"><div id="toc" class="toc"><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1 tocsection-1"><a href="CrawlingMultipleStartURLs.html#Crawling_multiple_start_URLs_in_one_job_run"><span class="tocnumber">1</span> <span class="toctext">Crawling multiple start URLs in one job run</span></a>
<ul>
<li class="toclevel-2 tocsection-2"><a href="CrawlingMultipleStartURLs.html#Workflow_Definition"><span class="tocnumber">1.1</span> <span class="toctext">Workflow Definition</span></a></li>
<li class="toclevel-2 tocsection-3"><a href="CrawlingMultipleStartURLs.html#Job_Definition"><span class="tocnumber">1.2</span> <span class="toctext">Job Definition</span></a></li>
<li class="toclevel-2 tocsection-4"><a href="CrawlingMultipleStartURLs.html#Running_the_Job"><span class="tocnumber">1.3</span> <span class="toctext">Running the Job</span></a></li>
</ul>
</li>
</ul>
</div>
<h2><span class="mw-headline" id="Crawling_multiple_start_URLs_in_one_job_run">Crawling multiple start URLs in one job run</span></h2>
<p>This page describes an alternative way of using the WebCrawler worker. This way allows you to define multiple start URLs to be crawled in a single job run (but multiple workflow runs) instead of a single start URL only. The main idea is to send each start URL as a simple record to the <a href="../Bulkbuilder.html#Record_push_REST_API" title="SMILA/Documentation/Bulkbuilder">Bulkbuilder Push API</a> at (<tt>/smila/job/&lt;crawlJobName&gt;/record</tt>) instead of specifying a single start URL as a parameter value in the job definition.
</p><p>It would easy to define more variants that crawl start URLs produced by some other worker or similar use cases.
</p>
<div class="messagebox" style="background-color: #def3fe; border: 1px solid #c5d7e0; color: black; padding: 5px; margin: 1ex 0; min-height: 35px; padding-left: 45px;">
<div style="float: left; margin-left: -40px;"><a href="http://wiki.eclipse.org/File:Idea.png" class="image"><img alt="Idea.png" src="http://wiki.eclipse.org/images/a/a4/Idea.png" width="35" height="35" /></a></div>
<div>
<p>Though we tested the following workflow and settings with the WebCrawler worker only, similar workflows using other crawler workers should work as well, provided that the used crawler worker is able to crawl follow-up links in its input slot that were produced by the very same worker in a previous task. An example of such a worker is the FileCrawler worker. Some workers might expect internal attributes to be set in these follow-up link records, which might cause problems. Please notify us if you observe such issues so that we can extend the respective worker accordingly.
</p>
<b><br /></div></b>
</div>
<p><br />
</p>
<h3><span class="mw-headline" id="Workflow_Definition">Workflow Definition</span></h3>
<p>You add such a workflow definition:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="javascript source-javascript"><pre class="de1">POST <span class="sy0">/</span>smila<span class="sy0">/</span>jobmanager<span class="sy0">/</span>workflows<span class="sy0">/</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;name&quot;</span><span class="sy0">:</span><span class="st0">&quot;multiWebCrawling&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;modes&quot;</span><span class="sy0">:</span><span class="br0">&#91;</span>
<span class="st0">&quot;standard&quot;</span>
<span class="br0">&#93;</span><span class="sy0">,</span>
<span class="st0">&quot;parameters&quot;</span><span class="sy0">:</span> <span class="br0">&#123;</span>
<span class="st0">&quot;startUrl&quot;</span><span class="sy0">:</span><span class="st0">&quot;&lt;send start urls via bulkbuilder&gt;&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;bulkLimitSize&quot;</span><span class="sy0">:</span><span class="nu0">1</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="st0">&quot;startAction&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;worker&quot;</span><span class="sy0">:</span><span class="st0">&quot;bulkbuilder&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;output&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;insertedRecords&quot;</span><span class="sy0">:</span><span class="st0">&quot;linksToCrawlBucket&quot;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="st0">&quot;actions&quot;</span><span class="sy0">:</span><span class="br0">&#91;</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;worker&quot;</span><span class="sy0">:</span><span class="st0">&quot;webCrawler&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;input&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;linksToCrawl&quot;</span><span class="sy0">:</span><span class="st0">&quot;linksToCrawlBucket&quot;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="st0">&quot;output&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;linksToCrawl&quot;</span><span class="sy0">:</span><span class="st0">&quot;linksToCrawlBucket&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;crawledRecords&quot;</span><span class="sy0">:</span><span class="st0">&quot;crawledLinksBucket&quot;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;worker&quot;</span><span class="sy0">:</span><span class="st0">&quot;deltaChecker&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;input&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;recordsToCheck&quot;</span><span class="sy0">:</span><span class="st0">&quot;crawledLinksBucket&quot;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="st0">&quot;output&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;updatedRecords&quot;</span><span class="sy0">:</span><span class="st0">&quot;updatedLinksBucket&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;updatedCompounds&quot;</span><span class="sy0">:</span><span class="st0">&quot;compoundLinksBucket&quot;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;worker&quot;</span><span class="sy0">:</span><span class="st0">&quot;webExtractor&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;input&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;compounds&quot;</span><span class="sy0">:</span><span class="st0">&quot;compoundLinksBucket&quot;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="st0">&quot;output&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;files&quot;</span><span class="sy0">:</span><span class="st0">&quot;fetchedLinksBucket&quot;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;worker&quot;</span><span class="sy0">:</span><span class="st0">&quot;webFetcher&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;input&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;linksToFetch&quot;</span><span class="sy0">:</span><span class="st0">&quot;updatedLinksBucket&quot;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="st0">&quot;output&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;fetchedLinks&quot;</span><span class="sy0">:</span><span class="st0">&quot;fetchedLinksBucket&quot;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;worker&quot;</span><span class="sy0">:</span><span class="st0">&quot;updatePusher&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;input&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;recordsToPush&quot;</span><span class="sy0">:</span><span class="st0">&quot;fetchedLinksBucket&quot;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span>
<span class="br0">&#93;</span>
<span class="br0">&#125;</span></pre></div></div>
<p>The differences to the standard "webCrawling" workflow are:
</p>
<ul>
<li> The start action is the bulkbuilder, not the webCrawler itself, so you can send records to this job using the document push API when it is running. We will use this to send records containing the start URLs.
</li>
<li> The bulkbuilder parameter <tt>bulkLimitSize</tt> is set to 1 (byte), so each inserted record will be written to an own bulk and crawled in its own workflow run. This way fatal errors caused by one start URL will not abort the crawl of another start URL.
</li>
<li> The webCrawler parameter <tt>startUrl</tt> is set to a dummy value, because it is required, but we do not need it, so we do not have to include it in the job definition.
</li>
<li> This job wants to run in "standard" mode instead of "runOnce" mode. This means that you have to finish it yourself after providing the start URLs.
</li>
</ul>
<h3><span class="mw-headline" id="Job_Definition">Job Definition</span></h3>
<p>This could be the job definition:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="javascript source-javascript"><pre class="de1">POST <span class="sy0">/</span>smila<span class="sy0">/</span>jobmanager<span class="sy0">/</span>jobs<span class="sy0">/</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;name&quot;</span><span class="sy0">:</span><span class="st0">&quot;crawlMultipleStartUrls&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;workflow&quot;</span><span class="sy0">:</span><span class="st0">&quot;multiWebCrawling&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;parameters&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;tempStore&quot;</span><span class="sy0">:</span><span class="st0">&quot;temp&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;dataSource&quot;</span><span class="sy0">:</span><span class="st0">&quot;multiweb&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;jobToPushTo&quot;</span><span class="sy0">:</span><span class="st0">&quot;indexUpdate&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;linksPerBulk&quot;</span><span class="sy0">:</span> <span class="nu0">100</span><span class="sy0">,</span>
<span class="st0">&quot;filters&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;maxCrawlDepth&quot;</span><span class="sy0">:</span> <span class="nu0">3</span><span class="sy0">,</span>
<span class="st0">&quot;urlPatterns&quot;</span><span class="sy0">:</span> <span class="br0">&#123;</span>
<span class="st0">&quot;include&quot;</span><span class="sy0">:</span> <span class="br0">&#91;</span>
<span class="st0">&quot;http://.*eclipse<span class="es0">\\</span>.org/.*SMILA.*&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;http://.*eclipse<span class="es0">\\</span>.org/.*smila.*&quot;</span><span class="br0">&#93;</span><span class="sy0">,</span>
<span class="st0">&quot;exclude&quot;</span><span class="sy0">:</span> <span class="br0">&#91;</span><span class="st0">&quot;.*<span class="es0">\\</span>?.*&quot;</span> <span class="br0">&#93;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span><span class="sy0">,</span>
<span class="st0">&quot;mapping&quot;</span><span class="sy0">:</span><span class="br0">&#123;</span>
<span class="st0">&quot;httpCharset&quot;</span><span class="sy0">:</span> <span class="st0">&quot;Charset&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;httpContenttype&quot;</span><span class="sy0">:</span> <span class="st0">&quot;ContentType&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;httpLastModified&quot;</span><span class="sy0">:</span> <span class="st0">&quot;LastModifiedDate&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;httpMimetype&quot;</span><span class="sy0">:</span> <span class="st0">&quot;MimeType&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;httpSize&quot;</span><span class="sy0">:</span> <span class="st0">&quot;Size&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;httpUrl&quot;</span><span class="sy0">:</span> <span class="st0">&quot;Url&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;httpContent&quot;</span><span class="sy0">:</span> <span class="st0">&quot;Content&quot;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span>
<span class="br0">&#125;</span></pre></div></div>
<p>The definion is very similar to a standard crawl job definition, it just does not include the start URL (which was fixed to a dummy value in the workflow definition already). Note that the <tt>urlPatterns</tt> will be applied to all URLs for each start URL, so <tt>include</tt> patterns must be valid for all start URLs you are planning to crawl, or possibly nothing will be crawled at all. In this example we want to crawl only different parts on eclipse.org hosts, so the include patterns will work.
</p>
<div class="messagebox" style="background-color: #def3fe; border: 1px solid #c5d7e0; color: black; padding: 5px; margin: 1ex 0; min-height: 35px; padding-left: 45px;">
<div style="float: left; margin-left: -40px;"><a href="http://wiki.eclipse.org/File:Idea.png" class="image"><img alt="Idea.png" src="http://wiki.eclipse.org/images/a/a4/Idea.png" width="35" height="35" /></a></div>
<div>
<p>You can also use the "stayOn" parameter for such use cases. It will cause the crawler to ignore all links on a web page that to not point to the same host or domain than the URL of the web page itself. <a href="Crawler/Web.html#Web_Crawler_Worker" title="SMILA/Documentation/Importing/Crawler/Web">WebCrawlerWorker parameters</a>
</p>
<b><br /></div></b>
</div>
<p><br />
</p>
<h3><span class="mw-headline" id="Running_the_Job">Running the Job</span></h3>
<p>Start the target job and the crawl job:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="javascript source-javascript"><pre class="de1">POST <span class="sy0">/</span>smila<span class="sy0">/</span>jobmanager<span class="sy0">/</span>jobs<span class="sy0">/</span>indexUpdate<span class="sy0">/</span>
POST <span class="sy0">/</span>smila<span class="sy0">/</span>jobmanager<span class="sy0">/</span>jobs<span class="sy0">/</span>crawlMultipleStartUrls<span class="sy0">/</span></pre></div></div>
<p>The jobs are in <tt>RUNNING</tt> mode now, but nothing else happens.
</p><p>Push start URLs:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="javascript source-javascript"><pre class="de1">POST <span class="sy0">/</span>smila<span class="sy0">/</span>job<span class="sy0">/</span>crawlMultipleStartUrls<span class="sy0">/</span>record<span class="sy0">/</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;_recordid&quot;</span><span class="sy0">:</span> <span class="st0">&quot;startUrl&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;httpUrl&quot;</span><span class="sy0">:</span><span class="st0">&quot;http://www.eclipse.org/smila&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;crawlDepth&quot;</span><span class="sy0">:</span> <span class="nu0">4</span>
<span class="br0">&#125;</span></pre></div></div>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="javascript source-javascript"><pre class="de1">POST <span class="sy0">/</span>smila<span class="sy0">/</span>job<span class="sy0">/</span>crawlMultipleStartUrls<span class="sy0">/</span>record<span class="sy0">/</span>
<span class="br0">&#123;</span>
<span class="st0">&quot;_recordid&quot;</span><span class="sy0">:</span> <span class="st0">&quot;startUrl&quot;</span><span class="sy0">,</span>
<span class="st0">&quot;httpUrl&quot;</span><span class="sy0">:</span><span class="st0">&quot;http://wiki.eclipse.org/SMILA&quot;</span><span class="sy0">,</span>
<span class="br0">&#125;</span></pre></div></div>
<p>Things to note:
</p>
<ul>
<li> The value of the <tt>_recordid</tt> attribute is irrelevant, but the bulkbuilder requires it to be set.
</li>
<li> The start URL must be provided as attribute <tt>httpUrl</tt>, regardless of the attribute mapping specified in the job.
</li>
<li> The optional <tt>crawlDepth</tt> parameter can be used to specify an individual crawl depth for the given (start) url. When this parameter isn't set, the Web Crawler Worker <i>maxCrawlDepth</i> parameter will be used as default. If <i>maxCrawlDepth</i> is also not set, the crawl depth is unlimited.
<ul>
<li> In the first pushed record above, <tt>"crawlDepth: 4"</tt> is set, so this is used as limit when following links.
</li>
<li> In the second record, no crawlDepth is set, so <tt>"maxCrawlDepth: 3"</tt> (set in the crawl job above) will be used
</li>
<li> Hint: You can use <tt>"crawlDepth: -1"</tt> to set the crawl depth unlimited, despite of a <i>maxCrawlDepth</i> setting.
</li>
</ul>
</li>
</ul>
<p>Finish the job:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="javascript source-javascript"><pre class="de1">POST <span class="sy0">/</span>smila<span class="sy0">/</span>jobmanager<span class="sy0">/</span>jobs<span class="sy0">/</span>crawlMultipleStartUrls<span class="sy0">/</span><span class="nu0">20120823</span><span class="sy0">-</span><span class="nu0">164700474635</span><span class="sy0">/</span>finish<span class="sy0">/</span></pre></div></div>
<p>(of course, you have to adapt the job run id)
</p><p>This will also cause the delta-delete to be triggered, when the crawling is done. Note that you should disable delta-delete if you do not crawl all start URLs in each job run, or else the documents from the start URLs not crawled in the latest job run will be removed from the index.
</p>
<!--
NewPP limit report
CPU time usage: 0.140 seconds
Real time usage: 0.161 seconds
Preprocessor visited node count: 114/1000000
Preprocessor generated node count: 369/1000000
Post‐expand include size: 3753/2097152 bytes
Template argument size: 2689/2097152 bytes
Highest expansion depth: 5/40
Expensive parser function count: 0/100
-->
<!-- Saved in parser cache with key my_wiki:pcache:idhash:38496-0!*!0!!en!2!* and timestamp 20150413202136 and revision id 346926
-->
</div>
<!-- catlinks -->
<div id='catlinks' class='catlinks'><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="http://wiki.eclipse.org/Special:Categories" title="Special:Categories">Category</a>: <ul><li><a href="http://wiki.eclipse.org/Category:SMILA" title="Category:SMILA">SMILA</a></li></ul></div></div> <!-- /catlinks -->
</div>
</div>
</div>
</div>
<!-- /maincontent -->
<!-- printfooter -->
<div class="printfooter">
Retrieved from "<a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;oldid=346926">http://wiki.eclipse.org/index.php?title=SMILA/Documentation/Importing/CrawlingMultipleStartURLs&amp;oldid=346926</a>" </div>
<!-- /printfooter -->
<!-- debughtml -->
<!-- /debughtml -->
</div>
<!-- /bodyContent -->
</section>
<!-- /content -->
<!-- footer -->
</div> <section id="footer-contribution-info" style="border-top:1px solid #ccc;" class="footer-offset background-white margin-top-25"><div class="container text-center padding-top-10 padding-bottom-10"><p id="footercredit">This page was last modified 11:01, 10 September 2013 by <a href="http://wiki.eclipse.org/User:Andreas.weber.empolis.com" title="User:Andreas.weber.empolis.com">Andreas Weber</a>. Based on work by <a href="http://wiki.eclipse.org/index.php?title=User:Nadine.auslaender.empolis.com&amp;action=edit&amp;redlink=1" class="new" title="User:Nadine.auslaender.empolis.com (page does not exist)">Nadine Ausländer</a> and <a href="http://wiki.eclipse.org/User:Juergen.schumacher.empolis.com" title="User:Juergen.schumacher.empolis.com">Juergen Schumacher</a>.</p><p id="footerviews">This page has been accessed 1,726 times.</p></div></section> </main> <!-- /#main-content-container-row -->
<p id="back-to-top" class="noprint hidden-print">
<a class="visible-xs" href="CrawlingMultipleStartURLs.html#top">Back to the top</a>
</p>
<footer role="contentinfo" class="noprint hidden-print">
<div class="container">
<div class="row">
<section id="footer-eclipse-foundation" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Eclipse Foundation</h2>
<ul class="nav">
<li><a href="https://eclipse.org/org/">About us</a></li>
<li><a href="https://eclipse.org/org/foundation/contact.php">Contact Us</a></li>
<li><a href="https://eclipse.org/donate">Donate</a></li>
<li><a href="https://eclipse.org/org/documents/">Governance</a></li>
<li><a href="https://eclipse.org/artwork/">Logo and Artwork</a></li>
<li><a href="https://eclipse.org/org/foundation/directors.php">Board of Directors</a></li>
</ul>
</section>
<section id="footer-legal" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Legal</h2>
<ul class="nav">
<li><a href="https://eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="https://eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="https://eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="https://eclipse.org/org/documents/epl-v10.php">Eclipse Public License </a></li>
<li><a href="https://eclipse.org/legal/">Legal Resources </a></li>
</ul>
</section>
<section id="footer-useful-links" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Useful Links</h2>
<ul class="nav">
<li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li>
<li><a href="http://help.eclipse.org/">Documentation</a></li>
<li><a href="https://eclipse.org/contribute/">How to Contribute</a></li>
<li><a href="https://eclipse.org/mail/">Mailing Lists</a></li>
<li><a href="https://eclipse.org/forums/">Forums</a></li>
<li><a href="http://marketplace.eclipse.org/">Marketplace</a></li>
</ul>
</section>
<section id="footer-other" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Other</h2>
<ul class="nav">
<li><a href="https://eclipse.org/ide/">IDE and Tools</a></li>
<li><a href="https://eclipse.org/projects">Community of Projects</a></li>
<li><a href="https://eclipse.org/org/workinggroups/">Working Groups</a></li>
</ul>
<ul class="list-inline social-media">
<li><a href="https://twitter.com/EclipseFdn"><i class="fa fa-twitter-square"></i></a></li>
<li><a href="https://plus.google.com/+Eclipse"><i class="fa fa-google-plus-square"></i></a></li>
<li><a href="https://www.facebook.com/eclipse.org"><i class="fa fa-facebook-square"></i> </a></li>
<li><a href="https://www.youtube.com/user/EclipseFdn"><i class="fa fa-youtube-square"></i></a></li>
</ul>
</section>
<div id="copyright" class="col-xs-offset-1 col-sm-14 col-md-24 col-md-offset-0">
<div>
<span><img src="http://eclipse.org/eclipse.org-common/themes/solstice/public/images/logo/eclipse-logo-bw-800x188.png" alt="Eclipse.org black and white logo" width="166" height="39" id="logo-eclipse-white"/></span>
<p id="copyright-text">Copyright &copy; 2014 The Eclipse Foundation. All Rights Reserved.</p>
</div>
</div>
<a href="CrawlingMultipleStartURLs.html#" class="scrollup">Back to the top</a>
</div>
</div>
</footer>
<script src="http://wiki.eclipse.org/skins/solstice/public/javascript/main.min.js"></script>
<!-- Placed at the end of the document so the pages load faster -->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-910670-2']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script> <!-- /footer -->
<script>/*<![CDATA[*/window.jQuery && jQuery.ready();/*]]>*/</script><script>if(window.mw){
mw.loader.state({"skins.solstice":"loading","site":"ready","user":"ready","user.groups":"ready"});
}</script>
<script src="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=skins.solstice&amp;only=scripts&amp;skin=solstice&amp;*"></script>
<script>if(window.mw){
mw.loader.load(["mediawiki.action.view.postEdit","mediawiki.user","mediawiki.hidpi","mediawiki.page.ready","mediawiki.searchSuggest"],null,true);
}</script>
<script>if(window.mw){
mw.config.set({"wgBackendResponseTime":338});
}</script> </body>
</html>