blob: ed9c7304de4956da28505bea3cec34e3ba2e9c68 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir="ltr" class="client-nojs">
<head>
<meta charset="UTF-8" />
<title>SMILA/Documentation/JDBC Crawler - Eclipsepedia</title>
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="generator" content="MediaWiki 1.23.2" />
<link rel="shortcut icon" href="http://wiki.eclipse.org/eclipse.org-common/themes/solstice/public/images/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="http://wiki.eclipse.org/opensearch_desc.php" title="Eclipsepedia (en)" />
<link rel="EditURI" type="application/rsd+xml" href="http://wiki.eclipse.org/api.php?action=rsd" />
<link rel="alternate" type="application/atom+xml" title="Eclipsepedia Atom feed" href="http://wiki.eclipse.org/index.php?title=Special:RecentChanges&amp;feed=atom" />
<link rel="stylesheet" href="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=mediawiki.legacy.commonPrint%2Cshared%7Cmediawiki.ui.button&amp;only=styles&amp;skin=solstice&amp;*" />
<link rel="stylesheet" href="http://wiki.eclipse.org/skins/solstice/public/stylesheets/styles.min.css?303" media="screen, print" /><meta name="ResourceLoaderDynamicStyles" content="" />
<style>a:lang(ar),a:lang(kk-arab),a:lang(mzn),a:lang(ps),a:lang(ur){text-decoration:none}
/* cache key: my_wiki:resourceloader:filter:minify-css:7:14ece53a42aa314864e5fd8c57f0d98f */</style>
<script src="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=startup&amp;only=scripts&amp;skin=solstice&amp;*"></script>
<script>if(window.mw){
mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"SMILA/Documentation/JDBC_Crawler","wgTitle":"SMILA/Documentation/JDBC Crawler","wgCurRevisionId":372805,"wgRevisionId":372805,"wgArticleId":16918,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":[],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRelevantPageName":"SMILA/Documentation/JDBC_Crawler","wgIsProbablyEditable":false,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgWikiEditorEnabledModules":{"toolbar":false,"dialogs":false,"hidesig":true,"preview":false,"previewDialog":false,"publish":false},"wgCategoryTreePageCategoryOptions":"{\"mode\":0,\"hideprefix\":20,\"showcount\":true,\"namespaces\":false}"});
}</script><script>if(window.mw){
mw.loader.implement("user.options",function($,jQuery){mw.user.options.set({"ccmeonemails":0,"cols":80,"date":"default","diffonly":0,"disablemail":0,"editfont":"default","editondblclick":0,"editsectiononrightclick":0,"enotifminoredits":0,"enotifrevealaddr":0,"enotifusertalkpages":1,"enotifwatchlistpages":1,"extendwatchlist":0,"fancysig":0,"forceeditsummary":0,"gender":"unknown","hideminor":0,"hidepatrolled":0,"imagesize":2,"math":1,"minordefault":0,"newpageshidepatrolled":0,"nickname":"","norollbackdiff":0,"numberheadings":0,"previewonfirst":0,"previewontop":1,"rcdays":7,"rclimit":50,"rows":25,"showhiddencats":0,"shownumberswatching":1,"showtoolbar":1,"skin":"solstice","stubthreshold":0,"thumbsize":2,"underline":2,"uselivepreview":0,"usenewrc":0,"watchcreations":1,"watchdefault":1,"watchdeletion":0,"watchlistdays":3,"watchlisthideanons":0,"watchlisthidebots":0,"watchlisthideliu":0,"watchlisthideminor":0,"watchlisthideown":0,"watchlisthidepatrolled":0,"watchmoves":0,"wllimit":250,
"useeditwarning":1,"prefershttps":1,"language":"en","variant-gan":"gan","variant-iu":"iu","variant-kk":"kk","variant-ku":"ku","variant-shi":"shi","variant-sr":"sr","variant-tg":"tg","variant-uz":"uz","variant-zh":"zh","searchNs0":true,"searchNs1":false,"searchNs2":false,"searchNs3":false,"searchNs4":false,"searchNs5":false,"searchNs6":false,"searchNs7":false,"searchNs8":false,"searchNs9":false,"searchNs10":false,"searchNs11":false,"searchNs12":false,"searchNs13":false,"searchNs14":false,"searchNs15":false,"variant":"en"});},{},{});mw.loader.implement("user.tokens",function($,jQuery){mw.user.tokens.set({"editToken":"+\\","patrolToken":false,"watchToken":false});},{},{});
/* cache key: my_wiki:resourceloader:filter:minify-js:7:70d74423d3fc1e1c18fa9a1ff645a84a */
}</script>
<script>if(window.mw){
mw.loader.load(["mediawiki.page.startup","mediawiki.legacy.wikibits","mediawiki.legacy.ajax"]);
}</script>
<style type="text/css">/*<![CDATA[*/
.source-xml {line-height: normal;}
.source-xml li, .source-xml pre {
line-height: normal; border: 0px none white;
}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for xml
* CSS class: source-xml, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann
* (http://qbnz.com/highlighter/ and http://geshi.org/)
* --------------------------------------
*/
.xml.source-xml .de1, .xml.source-xml .de2 {font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;}
.xml.source-xml {font-family:monospace;}
.xml.source-xml .imp {font-weight: bold; color: red;}
.xml.source-xml li, .xml.source-xml .li1 {font-weight: normal; vertical-align:top;}
.xml.source-xml .ln {width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;}
.xml.source-xml .li2 {font-weight: bold; vertical-align:top;}
.xml.source-xml .es0 {color: #000099; font-weight: bold;}
.xml.source-xml .br0 {color: #66cc66;}
.xml.source-xml .sy0 {color: #66cc66;}
.xml.source-xml .st0 {color: #ff0000;}
.xml.source-xml .nu0 {color: #cc66cc;}
.xml.source-xml .sc-1 {color: #808080; font-style: italic;}
.xml.source-xml .sc0 {color: #00bbdd;}
.xml.source-xml .sc1 {color: #ddbb00;}
.xml.source-xml .sc2 {color: #339933;}
.xml.source-xml .sc3 {color: #009900;}
.xml.source-xml .re0 {color: #000066;}
.xml.source-xml .re1 {color: #000000; font-weight: bold;}
.xml.source-xml .re2 {color: #000000; font-weight: bold;}
.xml.source-xml .ln-xtra, .xml.source-xml li.ln-xtra, .xml.source-xml div.ln-xtra {background-color: #ffc;}
.xml.source-xml span.xtra { display:block; }
/*]]>*/
</style><style type="text/css">/*<![CDATA[*/
.source-sql {line-height: normal;}
.source-sql li, .source-sql pre {
line-height: normal; border: 0px none white;
}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for sql
* CSS class: source-sql, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann
* (http://qbnz.com/highlighter/ and http://geshi.org/)
* --------------------------------------
*/
.sql.source-sql .de1, .sql.source-sql .de2 {font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;}
.sql.source-sql {font-family:monospace;}
.sql.source-sql .imp {font-weight: bold; color: red;}
.sql.source-sql li, .sql.source-sql .li1 {font-weight: normal; vertical-align:top;}
.sql.source-sql .ln {width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;}
.sql.source-sql .li2 {font-weight: bold; vertical-align:top;}
.sql.source-sql .kw1 {color: #993333; font-weight: bold;}
.sql.source-sql .co1 {color: #808080; font-style: italic;}
.sql.source-sql .coMULTI {color: #808080; font-style: italic;}
.sql.source-sql .es0 {color: #000099; font-weight: bold;}
.sql.source-sql .br0 {color: #66cc66;}
.sql.source-sql .sy0 {color: #66cc66;}
.sql.source-sql .st0 {color: #ff0000;}
.sql.source-sql .nu0 {color: #cc66cc;}
.sql.source-sql .ln-xtra, .sql.source-sql li.ln-xtra, .sql.source-sql div.ln-xtra {background-color: #ffc;}
.sql.source-sql span.xtra { display:block; }
/*]]>*/
</style><style type="text/css">/*<![CDATA[*/
.source-java {line-height: normal;}
.source-java li, .source-java pre {
line-height: normal; border: 0px none white;
}
/**
* GeSHi Dynamically Generated Stylesheet
* --------------------------------------
* Dynamically generated stylesheet for java
* CSS class: source-java, CSS id:
* GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann
* (http://qbnz.com/highlighter/ and http://geshi.org/)
* --------------------------------------
*/
.java.source-java .de1, .java.source-java .de2 {font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;}
.java.source-java {font-family:monospace;}
.java.source-java .imp {font-weight: bold; color: red;}
.java.source-java li, .java.source-java .li1 {font-weight: normal; vertical-align:top;}
.java.source-java .ln {width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;}
.java.source-java .li2 {font-weight: bold; vertical-align:top;}
.java.source-java .kw1 {color: #7F0055; font-weight: bold;}
.java.source-java .kw2 {color: #7F0055; font-weight: bold;}
.java.source-java .kw3 {color: #000000; font-weight: normal}
.java.source-java .kw4 {color: #7F0055; font-weight: bold;}
.java.source-java .co1 {color: #3F7F5F; font-style: italic;}
.java.source-java .co2 {color: #3F7F5F;}
.java.source-java .co3 {color: #3F7F5F; font-style: italic; font-weight: bold;}
.java.source-java .coMULTI {color: #3F5FBF; font-style: italic;}
.java.source-java .es0 {color: #000000;}
.java.source-java .br0 {color: #000000;}
.java.source-java .sy0 {color: #000000;}
.java.source-java .st0 {color: #2A00ff;}
.java.source-java .nu0 {color: #000000;}
.java.source-java .me1 {color: #000000;}
.java.source-java .me2 {color: #000000;}
.java.source-java .ln-xtra, .java.source-java li.ln-xtra, .java.source-java div.ln-xtra {background-color: #ffc;}
.java.source-java span.xtra { display:block; }
/*]]>*/
</style><meta name="viewport" content="width=device-width, initial-scale=1.0"></head>
<body class="mediawiki ltr sitedir-ltr ns-0 ns-subject page-SMILA_Documentation_JDBC_Crawler skin-solstice action-view" id="solstice">
<a class="sr-only" href="JDBC_Crawler.html#content">Skip to main content</a>
<div class="thin-header">
<header role="banner" class="hidden-print noprint">
<div class="container-fluid">
<div id="row-logo-search">
<div id="header-left">
<div class="row">
<div class="hidden-xs col-sm-6 logo-container">
<a href="https://www.eclipse.org/" ><img class="logo-eclipse-default" src="http://wiki.eclipse.org/skins/solstice/public/images/logo/eclipse-800x188.png" alt="Eclipsepedia"></a>
</div>
<div class="navbar col-sm-18 yamm" id="main-menu">
<div class="navbar-collapse collapse" id="navbar-collapse-1">
<ul class="nav navbar-nav">
<li><a target="_self" href="https://eclipse.org/downloads/">Download</a></li>
<li><a target="_self" href="https://eclipse.org/users/">Getting Started </a></li>
<li><a target="_self" href="https://eclipse.org/membership/">Members</a></li>
<li><a target="_self" href="https://eclipse.org/projects/">Projects</a></li>
<li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="JDBC_Crawler.html#">Community <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="http://marketplace.eclipse.org">Marketplace</a></li><li><a href="http://events.eclipse.org">Events</a></li><li><a href="http://www.planeteclipse.org/">Planet Eclipse</a></li><li><a href="https://eclipse.org/community/eclipse_newsletter/">Newsletter</a></li><li><a href="https://www.youtube.com/user/EclipseFdn">Videos</a></li></ul></li><li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="JDBC_Crawler.html#">Participate <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li><li><a href="https://eclipse.org/forums/">Forums</a></li><li><a href="https://eclipse.org/mail/">Mailing Lists</a></li><li><a href="https://wiki.eclipse.org/">Wiki</a></li><li><a href="https://wiki.eclipse.org/IRC">IRC</a></li><li><a href="https://eclipse.org/contribute/">How to Contribute</a></li></ul></li><li class="dropdown visible-xs"><a class="dropdown-toggle" data-toggle="dropdown" href="JDBC_Crawler.html#">Working Groups <b class="caret"></b></a><ul class="dropdown-menu"><li><a href="http://wiki.eclipse.org/Auto_IWG">Automotive</a></li><li><a href="http://iot.eclipse.org">Internet of Things</a></li><li><a href="http://locationtech.org">LocationTech</a></li><li><a href="http://lts.eclipse.org">Long-Term Support</a></li><li><a href="http://polarsys.org">PolarSys</a></li><li><a href="http://science.eclipse.org">Science</a></li><li><a href="http://openmdm.org">OpenMDM</a></li></ul></li><!-- More -->
<li class="dropdown hidden-xs"><a class="dropdown-toggle" data-toggle="dropdown">More<b class="caret"></b></a>
<ul class="dropdown-menu">
<li>
<!-- Content container to add padding -->
<div class="yamm-content">
<div class="row">
<ul class="col-sm-8 list-unstyled"><li><p><strong>Community</strong></p></li><li><a href="http://marketplace.eclipse.org">Marketplace</a></li><li><a href="http://events.eclipse.org">Events</a></li><li><a href="http://www.planeteclipse.org/">Planet Eclipse</a></li><li><a href="https://eclipse.org/community/eclipse_newsletter/">Newsletter</a></li><li><a href="https://www.youtube.com/user/EclipseFdn">Videos</a></li></ul><ul class="col-sm-8 list-unstyled"><li><p><strong>Participate</strong></p></li><li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li><li><a href="https://eclipse.org/forums/">Forums</a></li><li><a href="https://eclipse.org/mail/">Mailing Lists</a></li><li><a href="https://wiki.eclipse.org/">Wiki</a></li><li><a href="https://wiki.eclipse.org/IRC">IRC</a></li><li><a href="https://eclipse.org/contribute/">How to Contribute</a></li></ul><ul class="col-sm-8 list-unstyled"><li><p><strong>Working Groups</strong></p></li><li><a href="http://wiki.eclipse.org/Auto_IWG">Automotive</a></li><li><a href="http://iot.eclipse.org">Internet of Things</a></li><li><a href="http://locationtech.org">LocationTech</a></li><li><a href="http://lts.eclipse.org">Long-Term Support</a></li><li><a href="http://polarsys.org">PolarSys</a></li><li><a href="http://science.eclipse.org">Science</a></li><li><a href="http://openmdm.org">OpenMDM</a></li></ul> </div>
</div>
</li>
</ul>
</li>
</ul>
</div>
<div class="navbar-header">
<button data-target="#navbar-collapse-1" data-toggle="collapse" class="navbar-toggle" type="button">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="https://www.eclipse.org/" class="visible-xs navbar-brand"><img class="logo-eclipse-default" src="http://wiki.eclipse.org/skins/solstice/public/images/logo/eclipse-800x188.png" alt="Eclipsepedia" width="174"></a>
</div>
</div>
</div>
</div>
</div>
</div>
</header>
<section class="defaut-breadcrumbs hidden-print noprint hidden-print clearfix" id="breadcrumb">
<div>
<ol class="breadcrumb">
<li><a href="https://www.eclipse.org/">Home</a></li>
<li><a href="http://wiki.eclipse.org/Main_Page">Eclipse Wiki</a></li>
<li class="active">SMILA/Documentation/JDBC Crawler</li></ol>
</div>
</section>
</div>
<div class="toolbar-menu breadcrumbs-offset noprint hidden-print margin-bottom-0 clearfix">
<div class="col-md-24">
<ol class="breadcrumb" role="navigation">
<li id="pt-login">
<a href="http://wiki.eclipse.org/index.php?title=Special:UserLogin&amp;returnto=SMILA%2FDocumentation%2FJDBC+Crawler">
<i class="fa fa-sign-in fa-fw orange"></i> Log in </a>
</li>
</ul>
</div>
</div>
<main role="main" class="background-grey">
<div class="container-full padding-top-25">
<!-- content -->
<section id="content" class="mw-body container-full clearfix 0">
<div id="mw-js-message" style="display:none;"></div>
<!-- bodyContent -->
<div id="bodyContent">
<!-- jumpto -->
<div id="jump-to-nav" class="mw-jump">
Jump to: <a href="JDBC_Crawler.html#mw-head">navigation</a>,
<a href="JDBC_Crawler.html#p-search">search</a>
</div>
<!-- /jumpto -->
<!-- leftcol -->
<aside class="col-md-4 noprint hidden-print" id="leftcol">
<form class="input-group" role="form" id="form-eclipse-search" action="http://wiki.eclipse.org/index.php" id="searchform">
<input id="searchInput" class="search-query form-control" type="search" accesskey="f" title="Special:Search" placeholder="Search" name="search" value="">
<span class="input-group-btn">
<button value="search" id="mw-searchButton" type="submit" class="btn btn-default" title="Search the pages for this text" name="fulltext">
<i class="fa fa-search"></i>
</button>
</span>
</form>
<select class="form-control margin-top-10 margin-bottom-10 visible-xs visible-sm" onchange="this.options[this.selectedIndex].value && (window.location = this.options[this.selectedIndex].value);"><option class="fw-700 "><span class="fw-700">---Navigation---</span></option><option value="/Main_Page">Main Page</option><option value="/Eclipsepedia:Community_portal">Community portal</option><option value="/Eclipsepedia:Current_events">Current events</option><option value="/Special:RecentChanges">Recent changes</option><option value="/Special:Random">Random page</option><option value="https://www.mediawiki.org/wiki/Special:MyLanguage/Help:Contents">Help</option></select><ul class="ul-left-nav fa-ul hidden-print leftnav hidden-xs hidden-sm"><li class="separator"><span class="separator">Navigation</span></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Main_Page" id="n-mainpage" title="Visit the main page [z]" accesskey="z">Main Page</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Eclipsepedia:Community_portal" id="n-portal" title="About the project, what you can do, where to find things">Community portal</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Eclipsepedia:Current_events" id="n-currentevents" title="Find background information on current events">Current events</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:RecentChanges" id="n-recentchanges" title="A list of recent changes in the wiki [r]" accesskey="r">Recent changes</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:Random" id="n-randompage" title="Load a random page [x]" accesskey="x">Random page</a></li> <li class=""><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="https://www.mediawiki.org/wiki/Special:MyLanguage/Help:Contents" id="n-help" title="The place to find out">Help</a></li></ul> <select class="form-control margin-top-10 margin-bottom-10 visible-xs visible-sm" onchange="this.options[this.selectedIndex].value && (window.location = this.options[this.selectedIndex].value);"><option class="fw-700 "><span class="fw-700">---Toolbox---</span></option><option value="/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;action=info">Page information</option><option value="/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;oldid=372805">Permanent link</option><option value="/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;printable=yes">Printable version</option><option value="/Special:SpecialPages">Special pages</option><option value="/Special:RecentChangesLinked/SMILA/Documentation/JDBC_Crawler">Related changes</option><option value="/Special:WhatLinksHere/SMILA/Documentation/JDBC_Crawler">What links here</option></select><ul class="ul-left-nav fa-ul hidden-print leftnav hidden-xs hidden-sm"><li class="separator"><span class="separator">Toolbox</span></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;action=info" id="t-info">Page information</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;oldid=372805" id="t-permalink" title="Permanent link to this revision of the page">Permanent link</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;printable=yes" id="t-print" rel="alternate" title="Printable version of this page [p]" accesskey="p">Printable version</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:SpecialPages" id="t-specialpages" title="A list of all special pages [q]" accesskey="q">Special pages</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:RecentChangesLinked/SMILA/Documentation/JDBC_Crawler" id="t-recentchangeslinked" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li><li><i class="fa fa-angle-double-right orange fa-fw"></i> <a href="http://wiki.eclipse.org/Special:WhatLinksHere/SMILA/Documentation/JDBC_Crawler" id="t-whatlinkshere" title="A list of all wiki pages that link here [j]" accesskey="j">What links here</a></li></ul> </aside>
<!-- /leftcol -->
<!-- mainContent -->
<div id="mainContent" class="col-md-20">
<ul class="nav nav-tabs noprint hidden-print" role="tablist">
<li id="ca-nstab-main" class="active"><a href="JDBC_Crawler.html" title="View the content page [c]" accesskey="c" tabindex="-1">Page</a></li>
<li id="ca-talk" class="new"><a href="http://wiki.eclipse.org/index.php?title=Talk:SMILA/Documentation/JDBC_Crawler&amp;action=edit&amp;redlink=1" title="Discussion about the content page [t]" accesskey="t" tabindex="-1">Discussion</a></li>
<li id="ca-viewsource"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;action=edit" title="This page is protected.&#10;You can view its source [e]" accesskey="e" tabindex="-1">View source</a></li>
<li id="ca-history" class="collapsible"><a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;action=history" title="Past revisions of this page [h]" accesskey="h" tabindex="-1">History</a></li>
</ul> <div class="tab-content background-white">
<div id="tab-pane-main-page-content" class="tab-pane active">
<h1 id="firstHeading" class="firstHeading page-header">
<span dir="auto">SMILA/Documentation/JDBC Crawler</span>
</h1>
<div id="main-page-content">
<!-- subtitle -->
<div id="contentSub" class="alert alert-small alert-warning"><span class="subpages">&lt; <a href="../../SMILA.html" title="SMILA">SMILA</a>&lrm; | <a href="../Documentation.1.html" title="SMILA/Documentation">Documentation</a></span></div>
<!-- /subtitle -->
<div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr"><div class="messagebox" style="background-color: #def3fe; border: 1px solid #c5d7e0; color: black; padding: 5px; margin: 1ex 0; min-height: 35px; padding-left: 45px;">
<div style="float: left; margin-left: -40px;"><a href="http://wiki.eclipse.org/File:Note.png" class="image"><img alt="Note.png" src="http://wiki.eclipse.org/images/c/cc/Note.png" width="35" height="35" /></a></div>
<div><b>This is deprecated for SMILA 1.0, the connectivity framework has been replaced by the new <a href="../Documentation.1.html#Importing" title="SMILA/Documentation"> Importing framework</a>.</b><br /></div>
</div>
<p><br />
</p>
<div id="toc" class="toc"><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1 tocsection-1"><a href="JDBC_Crawler.html#Overview"><span class="tocnumber">1</span> <span class="toctext">Overview</span></a></li>
<li class="toclevel-1 tocsection-2"><a href="JDBC_Crawler.html#Crawling_configuration"><span class="tocnumber">2</span> <span class="toctext">Crawling configuration</span></a></li>
<li class="toclevel-1 tocsection-3"><a href="JDBC_Crawler.html#Crawling_configuration_explanation"><span class="tocnumber">3</span> <span class="toctext">Crawling configuration explanation</span></a></li>
<li class="toclevel-1 tocsection-4"><a href="JDBC_Crawler.html#Crawling_configuration_example"><span class="tocnumber">4</span> <span class="toctext">Crawling configuration example</span></a></li>
<li class="toclevel-1 tocsection-5"><a href="JDBC_Crawler.html#Output_example"><span class="tocnumber">5</span> <span class="toctext">Output example</span></a></li>
<li class="toclevel-1 tocsection-6"><a href="JDBC_Crawler.html#Using_the_Grouping_element"><span class="tocnumber">6</span> <span class="toctext">Using the Grouping element</span></a></li>
</ul>
</div>
<h2><span class="mw-headline" id="Overview">Overview</span></h2>
<p>The JDBC crawler collects data from a JDBC-accessible databases based on a SELECT statement given in <tt>DataSourceConnectionConfig</tt>.
Any of the columns of the database record returned by the SELECT statement may be mapped to an attribute of the SMILA record in the <tt>Attributes</tt> section of the <tt>DataSourceConnectionConfig</tt>.
</p><p>To overcome certain shortcomings of some JDBCd drivers concerning the handling of really large data sets, the crawler features an optional grouping mechanism enabling it to retrieve data in well defined frames from the database and thus avoid OutOfMemoryExceptions.
</p><p>To use the JDBC crawler with your custom JDBC driver, the JDBC crawler bundle's ClassLoader has to have access to the driver class, i.e. you have to have a bundle that exports the driver class.
</p>
<h2><span class="mw-headline" id="Crawling_configuration">Crawling configuration</span></h2>
<p>The configuration file has to be located at <tt>configuration/org.eclipse.smila.connectivity.framework</tt>.
</p><p>Defining Schema: <tt>org.eclipse.smila.connectivitiy.framework.crawler.jdbc/schemas/JdbcDataSourceConnectionConfigSchema.xsd</tt>
</p>
<h2><span class="mw-headline" id="Crawling_configuration_explanation">Crawling configuration explanation</span></h2>
<p>See <a href="Crawler.html#Configuration" title="SMILA/Documentation/Crawler">SMILA/Documentation/Crawler#Configuration</a> for the generic parts of the configuration file.
</p><p>The root element of the crawling configuration is <tt>DataSourceConnectionConfig</tt> and contains the following sub elements:
</p>
<ul>
<li> <tt>DataSourceID</tt> – the identification of a data source.
</li>
<li> <tt>SchemaID</tt> – specify the schema for a crawler job.
</li>
<li> <tt>DataConnectionID</tt> – describes which agent crawler should be used.
<ul>
<li> <tt>Crawler</tt> – implementation class of a crawler.
</li>
</ul>
</li>
<li> <tt>DeltaIndexing</tt> – specifies deltaindexing mode.
</li>
<li> <tt>Attributes</tt> – list all attributes you would like to use from a database row
<ul>
<li> <tt>Attribute</tt>
<ul>
<li> <tt>Type</tt> (required) – the data type to use in the SMILA record.
</li>
<li> <tt>Name</tt> (required) – the name of the attribute to create in the SMILA record.
</li>
<li> <tt>HashAttribute</tt> (required) – specify if a hash should be created (true or false).
</li>
<li> <tt>KeyAttribute</tt> (required) – creates a key for this object, for example for record id (true or false).
</li>
<li> <tt>Attachment</tt> (required) – specify if the attribute's data should be stored as an attachment e.g. for blob or clob fields (true or false)
</li>
<li> <tt>ColumnName</tt> (required) - the name of the column in the database row to use for this attribute
</li>
<li> <tt>SqlType</tt> (required)- specify the SQL Type of the column in the database row (one of: "string", "long", "date", "double", "blob", "clob", "boolean", "byte[]", "timestamp")
</li>
</ul>
</li>
</ul>
</li>
</ul>
<ul>
<li> <tt>Process</tt>
<ul>
<li> <tt>Selections</tt> - Which data is to be selected (and how)
<ul>
<li> <tt>Grouping</tt> (optional) - adds support for "chunk"-wise retrieving of data from the database in order to preserve memory resources. See "Using the Grouping Element" below.
<ul>
<li> <tt>Stepping</tt> - how many rows should be retrieved at a time (integer)
</li>
<li> <tt>SQL</tt> - specify an SQL statement that returns an ordered list of database keys which can be used to create the retrieval partitions
</li>
</ul>
</li>
<li> <tt>SQL</tt> - the SQL statement that selects the actual data to be retrieved.
</li>
</ul>
</li>
<li> <tt>Database</tt> - JDBC Connection information
<ul>
<li> <tt>Connection</tt> - The JDBC connection URL to use for connecting to the database
</li>
<li> <tt>User</tt> - The username to use when connecting to the database (can be left blank if anonymous access is possible)
</li>
<li> <tt>Password</tt> - The password to use when connecting to the database (can be left blank if anonymous access is possible)
</li>
<li> <tt>FetchSize</tt> - The FetchSize to set when creating the JDBC-Connection. This is mapped to the JDBC-property and must not be confused with the Stepping functionality of the crawler.
</li>
<li> <tt>JdbcDriver</tt> - Specify the fully qualified class name of the Jdbc-Driver to use (must be accessible to the bundle's class loader)
</li>
</ul>
</li>
</ul>
</li>
</ul>
<h2><span class="mw-headline" id="Crawling_configuration_example">Crawling configuration example</span></h2>
<p>A typical configuration for the JDBC crawler looks like this:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="xml source-xml"><pre class="de1"><span class="sc3"><span class="re1">&lt;?xml</span> <span class="re0">version</span>=<span class="st0">&quot;1.0&quot;</span> <span class="re0">encoding</span>=<span class="st0">&quot;UTF-8&quot;</span><span class="re2">?&gt;</span></span>
<span class="sc3"><span class="re1">&lt;DataSourceConnectionConfig</span></span>
<span class="sc3"> <span class="re0">xmlns:xsi</span>=<span class="st0">&quot;http://www.w3.org/2001/XMLSchema-instance&quot;</span></span>
<span class="sc3"> <span class="re0">xsi:noNamespaceSchemaLocation</span>=<span class="st0">&quot;../org.eclipse.smila.connectivity.framework.crawler.jdbc/schemas/JdbcDataSourceConnectionConfigSchema.xsd&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;DataSourceID<span class="re2">&gt;</span></span></span>jdbc<span class="sc3"><span class="re1">&lt;/DataSourceID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SchemaID<span class="re2">&gt;</span></span></span>org.eclipse.smila.connectivity.framework.crawler.jdbc<span class="sc3"><span class="re1">&lt;/SchemaID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;DataConnectionID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Crawler<span class="re2">&gt;</span></span></span>JdbcCrawler<span class="sc3"><span class="re1">&lt;/Crawler<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/DataConnectionID<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;DeltaIndexing<span class="re2">&gt;</span></span></span>full<span class="sc3"><span class="re1">&lt;/DeltaIndexing<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attributes<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attribute</span> <span class="re0">Name</span>=<span class="st0">&quot;Person_ID&quot;</span> <span class="re0">HashAttribute</span>=<span class="st0">&quot;true&quot;</span></span>
<span class="sc3"> <span class="re0">KeyAttribute</span>=<span class="st0">&quot;true&quot;</span> <span class="re0">Type</span>=<span class="st0">&quot;Long&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;ColumnName<span class="re2">&gt;</span></span></span>id<span class="sc3"><span class="re1">&lt;/ColumnName<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SqlType<span class="re2">&gt;</span></span></span>long<span class="sc3"><span class="re1">&lt;/SqlType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Attribute<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attribute</span> <span class="re0">Name</span>=<span class="st0">&quot;BMI&quot;</span> <span class="re0">HashAttribute</span>=<span class="st0">&quot;true&quot;</span> <span class="re0">KeyAttribute</span>=<span class="st0">&quot;false&quot;</span></span>
<span class="sc3"> <span class="re0">Type</span>=<span class="st0">&quot;double&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;ColumnName<span class="re2">&gt;</span></span></span>body_mass_index<span class="sc3"><span class="re1">&lt;/ColumnName<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SqlType<span class="re2">&gt;</span></span></span>double<span class="sc3"><span class="re1">&lt;/SqlType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Attribute<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attribute</span> <span class="re0">Name</span>=<span class="st0">&quot;VacationDays&quot;</span> <span class="re0">HashAttribute</span>=<span class="st0">&quot;false&quot;</span></span>
<span class="sc3"> <span class="re0">KeyAttribute</span>=<span class="st0">&quot;false&quot;</span> <span class="re0">Type</span>=<span class="st0">&quot;Long&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;ColumnName<span class="re2">&gt;</span></span></span>vacationdays<span class="sc3"><span class="re1">&lt;/ColumnName<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SqlType<span class="re2">&gt;</span></span></span>long<span class="sc3"><span class="re1">&lt;/SqlType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Attribute<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attribute</span> <span class="re0">Name</span>=<span class="st0">&quot;BirthDay&quot;</span> <span class="re0">HashAttribute</span>=<span class="st0">&quot;true&quot;</span></span>
<span class="sc3"> <span class="re0">KeyAttribute</span>=<span class="st0">&quot;true&quot;</span> <span class="re0">Type</span>=<span class="st0">&quot;date&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;ColumnName<span class="re2">&gt;</span></span></span>birthday<span class="sc3"><span class="re1">&lt;/ColumnName<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SqlType<span class="re2">&gt;</span></span></span>date<span class="sc3"><span class="re1">&lt;/SqlType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Attribute<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attribute</span> <span class="re0">Name</span>=<span class="st0">&quot;DownSizeCandidate&quot;</span> <span class="re0">HashAttribute</span>=<span class="st0">&quot;false&quot;</span></span>
<span class="sc3"> <span class="re0">KeyAttribute</span>=<span class="st0">&quot;false&quot;</span> <span class="re0">Type</span>=<span class="st0">&quot;Boolean&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;ColumnName<span class="re2">&gt;</span></span></span>scheduled_for_downsizing<span class="sc3"><span class="re1">&lt;/ColumnName<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SqlType<span class="re2">&gt;</span></span></span>boolean<span class="sc3"><span class="re1">&lt;/SqlType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Attribute<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attribute</span> <span class="re0">Name</span>=<span class="st0">&quot;DownSizedOn&quot;</span> <span class="re0">HashAttribute</span>=<span class="st0">&quot;false&quot;</span></span>
<span class="sc3"> <span class="re0">KeyAttribute</span>=<span class="st0">&quot;false&quot;</span> <span class="re0">Type</span>=<span class="st0">&quot;Timestamp&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;ColumnName<span class="re2">&gt;</span></span></span>downsized<span class="sc3"><span class="re1">&lt;/ColumnName<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SqlType<span class="re2">&gt;</span></span></span>timestamp<span class="sc3"><span class="re1">&lt;/SqlType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Attribute<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attribute</span> <span class="re0">Name</span>=<span class="st0">&quot;Photo&quot;</span> <span class="re0">HashAttribute</span>=<span class="st0">&quot;false&quot;</span> <span class="re0">Attachment</span>=<span class="st0">&quot;true&quot;</span></span>
<span class="sc3"> <span class="re0">KeyAttribute</span>=<span class="st0">&quot;false&quot;</span> <span class="re0">Type</span>=<span class="st0">&quot;Blob&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;ColumnName<span class="re2">&gt;</span></span></span>photo<span class="sc3"><span class="re1">&lt;/ColumnName<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SqlType<span class="re2">&gt;</span></span></span>blob<span class="sc3"><span class="re1">&lt;/SqlType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Attribute<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attribute</span> <span class="re0">Name</span>=<span class="st0">&quot;Resume&quot;</span> <span class="re0">HashAttribute</span>=<span class="st0">&quot;false&quot;</span></span>
<span class="sc3"> <span class="re0">KeyAttribute</span>=<span class="st0">&quot;false&quot;</span> <span class="re0">Attachment</span>=<span class="st0">&quot;true&quot;</span> <span class="re0">Type</span>=<span class="st0">&quot;Clob&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;ColumnName<span class="re2">&gt;</span></span></span>cv<span class="sc3"><span class="re1">&lt;/ColumnName<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SqlType<span class="re2">&gt;</span></span></span>clob<span class="sc3"><span class="re1">&lt;/SqlType<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Attribute<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Attributes<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Process<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Selections<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Grouping<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Stepping<span class="re2">&gt;</span></span></span>13<span class="sc3"><span class="re1">&lt;/Stepping<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SQL<span class="re2">&gt;</span></span></span>SELECT id FROM person ORDER BY id ASC<span class="sc3"><span class="re1">&lt;/SQL<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Grouping<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;SQL<span class="re2">&gt;</span></span></span>
SELECT id, vorname, name, body_mass_index, vacationdays,
birthday, scheduled_for_downsizing, downsized, photo, cv
FROM person where id BETWEEN&#160;%01min AND&#160;%01max
<span class="sc3"><span class="re1">&lt;/SQL<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Selections<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Database</span> <span class="re0">Connection</span>=<span class="st0">&quot;jdbc:derby:crawlerTestDerbyDB&quot;</span> <span class="re0">User</span>=<span class="st0">&quot;&quot;</span></span>
<span class="sc3"> <span class="re0">Password</span>=<span class="st0">&quot;&quot;</span> <span class="re0">FetchSize</span>=<span class="st0">&quot;100000&quot;</span></span>
<span class="sc3"> <span class="re0">JdbcDriver</span>=<span class="st0">&quot;org.apache.derby.jdbc.EmbeddedDriver&quot;</span> <span class="re2">/&gt;</span></span>
<span class="sc3"><span class="re1">&lt;/Process<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/DataSourceConnectionConfig<span class="re2">&gt;</span></span></span></pre></div></div>
<h2><span class="mw-headline" id="Output_example">Output example</span></h2>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="xml source-xml"><pre class="de1"><span class="sc3"><span class="re1">&lt;Record</span> <span class="re0">xmlns</span>=<span class="st0">&quot;http://www.eclipse.org/smila/record&quot;</span> <span class="re0">version</span>=<span class="st0">&quot;2.0&quot;</span><span class="re2">&gt;</span></span>
<span class="sc3"><span class="re1">&lt;Val</span> <span class="re0">key</span>=<span class="st0">&quot;_recordid&quot;</span><span class="re2">&gt;</span></span>jdbc:<span class="sc1">&amp;lt;</span>Birthday=2009-04-02T00:00:00+0100;Person_ID=1<span class="sc1">&amp;gt;</span><span class="sc3"><span class="re1">&lt;/Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Val</span> <span class="re0">key</span>=<span class="st0">&quot;_source&quot;</span><span class="re2">&gt;</span></span>jdbc<span class="sc3"><span class="re1">&lt;/Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Val</span> <span class="re0">key</span>=<span class="st0">&quot;Person_ID&quot;</span> <span class="re0">type</span>=<span class="st0">&quot;long&quot;</span><span class="re2">&gt;</span></span>1<span class="sc3"><span class="re1">&lt;/Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Val</span> <span class="re0">key</span>=<span class="st0">&quot;BMI&quot;</span> <span class="re0">type</span>=<span class="st0">&quot;long&quot;</span><span class="re2">&gt;</span></span>0.5497346110141528<span class="sc3"><span class="re1">&lt;/Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Val</span> <span class="re0">key</span>=<span class="st0">&quot;VacationDays&quot;</span> <span class="re0">type</span>=<span class="st0">&quot;long&quot;</span><span class="re2">&gt;</span></span>23<span class="sc3"><span class="re1">&lt;/Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Val</span> <span class="re0">key</span>=<span class="st0">&quot;BirthDay&quot;</span> <span class="re0">type</span>=<span class="st0">&quot;datetime&quot;</span><span class="re2">&gt;</span></span>2009-04-02T00:00:00+0100<span class="sc3"><span class="re1">&lt;/Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Val</span> <span class="re0">key</span>=<span class="st0">&quot;DownSizeCandidate&quot;</span> <span class="re0">type</span>=<span class="st0">&quot;long&quot;</span><span class="re2">&gt;</span></span>0<span class="sc3"><span class="re1">&lt;/Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Val</span> <span class="re0">key</span>=<span class="st0">&quot;DownSizedOn&quot;</span> <span class="re0">type</span>=<span class="st0">&quot;datetime&quot;</span><span class="re2">&gt;</span></span>2009-04-02 00:00:00+0100<span class="sc3"><span class="re1">&lt;/Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Val</span> <span class="re0">key</span>=<span class="st0">&quot;_HASH_TOKEN&quot;</span><span class="re2">&gt;</span></span>69d132fab2fd88cf9ccc17e57f68394ac3fed97ec8bab1c89bf764a6fa662<span class="sc3"><span class="re1">&lt;/Val<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attachment<span class="re2">&gt;</span></span></span>Photo<span class="sc3"><span class="re1">&lt;/Attachment<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;Attachment<span class="re2">&gt;</span></span></span>Resume<span class="sc3"><span class="re1">&lt;/Attachment<span class="re2">&gt;</span></span></span>
<span class="sc3"><span class="re1">&lt;/Record<span class="re2">&gt;</span></span></span></pre></div></div>
<h2><span class="mw-headline" id="Using_the_Grouping_element">Using the Grouping element</span></h2>
<p>If used, the <b>Grouping</b> element must contain its own <b>SQL</b> element with a SELECT statement and an additional <b>Stepping</b> element containing a non-negative integer value.
</p>
Use of the <b>Grouping</b> element is best explained by means of an example. Imagine running the retrieval statement <div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="sql source-sql"><pre class="de1"><span class="kw1">SELECT</span> <span class="sy0">*</span> <span class="kw1">FROM</span> CUSTOMER</pre></div></div> on your CUSTOMER table and further imagine that you have a very healthy customer base, so your CUSTOMER table contains 750.000 rows. You would invariably come down with OutOfMemoryExceptions during the crawling run, as the JDBC-Resultset does not release the resources for retrieved data rows. After calling <div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="java source-java"><pre class="de1">resultSet.<span class="me1">next</span><span class="br0">&#40;</span><span class="br0">&#41;</span></pre></div></div> a few hundred thousand times an OutOfMemoryException is more or less inevitable.
<p>Grouping to the rescue! The grouping element enables you to break down your query results into blocks of custom size (specified by the <b>Stepping</b> value) which can be retrieved sequentially while releasing the resultset's resources after each block. You have to be sure to provide a SELECT statement in the <b>SQL</b> element of <b>Grouping</b> which returns a <i><b>sorted</b></i> list of key values by which the results can be grouped. This could be the primary key of the table for instance or any other suitable discriminator. The stepping value is used by the crawler in turn to form groups of the specified size from the keys.
</p>
If the statement <div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="sql source-sql"><pre class="de1"> <span class="kw1">SELECT</span> primaryKey <span class="kw1">FROM</span> CUSTOMER <span class="kw1">ORDER</span> <span class="kw1">BY</span> primaryKey <span class="kw1">ASC</span></pre></div></div> returned for example the following values:
<dl>
<dd><dl>
<dd><b>{1, 2, 4, 5, 6, 8, 12, 13, 21, 34, 56, 67}</b>
</dd>
</dl>
</dd>
</dl>
<p>a stepping value of <b>5</b> would result in the creation of the following groups:
</p>
<dl>
<dd><dl>
<dd><b>{1, 2, 4, 5, 6}</b>
</dd>
<dd><b>{8, 12, 13, 21, 34}</b> and
</dd>
<dd><b>{56, 67}</b>
</dd>
</dl>
</dd>
</dl>
<p>whereas with a stepping value of <b>9</b> the following groups would have been formed:
</p>
<dl>
<dd><dl>
<dd><b>{1, 2, 4, 5, 6, 8, 12, 13, 21}</b> and
</dd>
<dd><b>{34, 56, 67}</b>
</dd>
</dl>
</dd>
</dl>
<p>You also have to apply a slight modification to your original retrieval SQL statement (the one directly descending the <b>Selections</b> element) in order for the grouping feature to do its magic:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="sql source-sql"><pre class="de1"><span class="kw1">SELECT</span> <span class="sy0">*</span> <span class="kw1">FROM</span> CUSTOMER <span class="kw1">WHERE</span> primaryKey <span class="kw1">BETWEEN</span>&#160;%min01 <span class="kw1">AND</span>&#160;%max01</pre></div></div>
<p>The crawler will replace the <b>%min01</b> and <b>%max01</b> tokens with the respective minimum and maximum values of each of the formed groups thus creating the following three SQL statements for data retrieval (assuming a stepping value of <b>5</b> was used:
</p>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="sql source-sql"><pre class="de1"><span class="kw1">SELECT</span> <span class="sy0">*</span> <span class="kw1">FROM</span> CUSTOMER <span class="kw1">WHERE</span> primaryKey <span class="kw1">BETWEEN</span> <span class="nu0">1</span> <span class="kw1">AND</span> <span class="nu0">6</span></pre></div></div>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="sql source-sql"><pre class="de1"><span class="kw1">SELECT</span> <span class="sy0">*</span> <span class="kw1">FROM</span> CUSTOMER <span class="kw1">WHERE</span> primaryKey <span class="kw1">BETWEEN</span> <span class="nu0">8</span> <span class="kw1">AND</span> <span class="nu0">34</span></pre></div></div>
<div dir="ltr" class="mw-geshi mw-code mw-content-ltr"><div class="sql source-sql"><pre class="de1"><span class="kw1">SELECT</span> <span class="sy0">*</span> <span class="kw1">FROM</span> CUSTOMER <span class="kw1">WHERE</span> primaryKey <span class="kw1">BETWEEN</span> <span class="nu0">56</span> <span class="kw1">AND</span> <span class="nu0">67</span></pre></div></div>
<p>which will subsequently be submitted sequentially to the database.
</p>
<!--
NewPP limit report
CPU time usage: 0.240 seconds
Real time usage: 0.282 seconds
Preprocessor visited node count: 97/1000000
Preprocessor generated node count: 416/1000000
Post‐expand include size: 1002/2097152 bytes
Template argument size: 470/2097152 bytes
Highest expansion depth: 5/40
Expensive parser function count: 0/100
-->
<!-- Saved in parser cache with key my_wiki:pcache:idhash:16918-0!*!0!!en!2!* and timestamp 20150414084634 and revision id 372805
-->
</div>
<!-- catlinks -->
<div id='catlinks' class='catlinks catlinks-allhidden'></div> <!-- /catlinks -->
</div>
</div>
</div>
</div>
<!-- /maincontent -->
<!-- printfooter -->
<div class="printfooter">
Retrieved from "<a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;oldid=372805">http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;oldid=372805</a>" </div>
<!-- /printfooter -->
<!-- debughtml -->
<!-- /debughtml -->
</div>
<!-- /bodyContent -->
</section>
<!-- /content -->
<!-- footer -->
</div> <section id="footer-contribution-info" style="border-top:1px solid #ccc;" class="footer-offset background-white margin-top-25"><div class="container text-center padding-top-10 padding-bottom-10"><p id="footercredit">This page was last modified 11:31, 28 October 2014 by <a href="http://wiki.eclipse.org/User:Andreas.weber.empolis.com" title="User:Andreas.weber.empolis.com">Andreas Weber</a>. Based on work by <a href="http://wiki.eclipse.org/index.php?title=User:Juergen.schumacher.attensity.com&amp;action=edit&amp;redlink=1" class="new" title="User:Juergen.schumacher.attensity.com (page does not exist)">Juergen Schumacher</a>, <a href="http://wiki.eclipse.org/User:Drazen.cindric.attensity.com" title="User:Drazen.cindric.attensity.com">Drazen Cindric</a> and <a href="http://wiki.eclipse.org/User:Andreas.Weber.empolis.com" title="User:Andreas.Weber.empolis.com">Andreas Weber</a> and <a href="http://wiki.eclipse.org/index.php?title=SMILA/Documentation/JDBC_Crawler&amp;action=credits" title="SMILA/Documentation/JDBC Crawler">others</a>.</p><p id="footerviews">This page has been accessed 8,382 times.</p></div></section> </main> <!-- /#main-content-container-row -->
<p id="back-to-top" class="noprint hidden-print">
<a class="visible-xs" href="JDBC_Crawler.html#top">Back to the top</a>
</p>
<footer role="contentinfo" class="noprint hidden-print">
<div class="container">
<div class="row">
<section id="footer-eclipse-foundation" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Eclipse Foundation</h2>
<ul class="nav">
<li><a href="https://eclipse.org/org/">About us</a></li>
<li><a href="https://eclipse.org/org/foundation/contact.php">Contact Us</a></li>
<li><a href="https://eclipse.org/donate">Donate</a></li>
<li><a href="https://eclipse.org/org/documents/">Governance</a></li>
<li><a href="https://eclipse.org/artwork/">Logo and Artwork</a></li>
<li><a href="https://eclipse.org/org/foundation/directors.php">Board of Directors</a></li>
</ul>
</section>
<section id="footer-legal" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Legal</h2>
<ul class="nav">
<li><a href="https://eclipse.org/legal/privacy.php">Privacy Policy</a></li>
<li><a href="https://eclipse.org/legal/termsofuse.php">Terms of Use</a></li>
<li><a href="https://eclipse.org/legal/copyright.php">Copyright Agent</a></li>
<li><a href="https://eclipse.org/org/documents/epl-v10.php">Eclipse Public License </a></li>
<li><a href="https://eclipse.org/legal/">Legal Resources </a></li>
</ul>
</section>
<section id="footer-useful-links" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Useful Links</h2>
<ul class="nav">
<li><a href="https://bugs.eclipse.org/bugs/">Report a Bug</a></li>
<li><a href="http://help.eclipse.org/">Documentation</a></li>
<li><a href="https://eclipse.org/contribute/">How to Contribute</a></li>
<li><a href="https://eclipse.org/mail/">Mailing Lists</a></li>
<li><a href="https://eclipse.org/forums/">Forums</a></li>
<li><a href="http://marketplace.eclipse.org/">Marketplace</a></li>
</ul>
</section>
<section id="footer-other" class="col-xs-offset-1 col-xs-11 col-sm-7 col-md-6 col-md-offset-0">
<h2 class="section-title">Other</h2>
<ul class="nav">
<li><a href="https://eclipse.org/ide/">IDE and Tools</a></li>
<li><a href="https://eclipse.org/projects">Community of Projects</a></li>
<li><a href="https://eclipse.org/org/workinggroups/">Working Groups</a></li>
</ul>
<ul class="list-inline social-media">
<li><a href="https://twitter.com/EclipseFdn"><i class="fa fa-twitter-square"></i></a></li>
<li><a href="https://plus.google.com/+Eclipse"><i class="fa fa-google-plus-square"></i></a></li>
<li><a href="https://www.facebook.com/eclipse.org"><i class="fa fa-facebook-square"></i> </a></li>
<li><a href="https://www.youtube.com/user/EclipseFdn"><i class="fa fa-youtube-square"></i></a></li>
</ul>
</section>
<div id="copyright" class="col-xs-offset-1 col-sm-14 col-md-24 col-md-offset-0">
<div>
<span><img src="http://eclipse.org/eclipse.org-common/themes/solstice/public/images/logo/eclipse-logo-bw-800x188.png" alt="Eclipse.org black and white logo" width="166" height="39" id="logo-eclipse-white"/></span>
<p id="copyright-text">Copyright &copy; 2014 The Eclipse Foundation. All Rights Reserved.</p>
</div>
</div>
<a href="JDBC_Crawler.html#" class="scrollup">Back to the top</a>
</div>
</div>
</footer>
<script src="http://wiki.eclipse.org/skins/solstice/public/javascript/main.min.js"></script>
<!-- Placed at the end of the document so the pages load faster -->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-910670-2']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script> <!-- /footer -->
<script>/*<![CDATA[*/window.jQuery && jQuery.ready();/*]]>*/</script><script>if(window.mw){
mw.loader.state({"skins.solstice":"loading","site":"ready","user":"ready","user.groups":"ready"});
}</script>
<script src="http://wiki.eclipse.org/load.php?debug=false&amp;lang=en&amp;modules=skins.solstice&amp;only=scripts&amp;skin=solstice&amp;*"></script>
<script>if(window.mw){
mw.loader.load(["mediawiki.action.view.postEdit","mediawiki.user","mediawiki.hidpi","mediawiki.page.ready","mediawiki.searchSuggest"],null,true);
}</script>
<script>if(window.mw){
mw.config.set({"wgBackendResponseTime":352});
}</script> </body>
</html>