blob: 1c8d8293b665b9292518a5c47f51ae72983bbe78 [file] [log] [blame]
<!DOCTYPE html>
<html lang='en' dir='auto'><head>
<meta charset='utf-8'>
<meta name='viewport' content='width=device-width, initial-scale=1'>
<meta name='description' content=''>
<meta name='theme-color' content='#ffcd00'>
<meta property='og:title' content='Eclipse DataEggs'>
<meta property='og:description' content=''>
<meta property='og:url' content='https://download.eclipse.org/dataeggs/'>
<meta property='og:site_name' content='Eclipse DataEggs'>
<meta property='og:type' content='website'><meta property='og:image' content='https://download.eclipse.org/dataeggs/images/dataeggs-header.png'><meta name='twitter:card' content='summary_large_image'><meta property='twitter:image' content='https://download.eclipse.org/dataeggs/images/dataeggs-header.png'>
<meta name="generator" content="Hugo 0.80.0" />
<title>Eclipse DataEggs</title>
<link rel='canonical' href='https://download.eclipse.org/dataeggs/'>
<link href="https://download.eclipse.org/dataeggs/index.xml" rel="alternate" type="application/rss+xml" title="Eclipse DataEggs" />
<link rel='icon' href='/dataeggs/favicon.ico'>
<link rel='stylesheet' href='/dataeggs/assets/css/main.ab98e12b.css'><link rel='stylesheet' href='/dataeggs/css/custom.css'><style>
:root{--color-accent:#ffcd00;}
</style>
<script type="application/javascript">
var doNotTrack = false;
if (!doNotTrack) {
window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
ga('create', 'UA-3675452-15', 'auto');
ga('send', 'pageview');
}
</script>
<script async src='https://www.google-analytics.com/analytics.js'></script>
</head>
<body class='home type-page has-cover has-sidebar'>
<div class='site'><div id='sidebar' class='sidebar'>
<a class='screen-reader-text' href='#main-menu'>Skip to Main Menu</a>
<div class='container'><section class='widget widget-about sep-after'>
<header>
<div class='logo'>
<a href='/dataeggs/'>
<img src='/dataeggs/images/dataeggs-menu.png'>
</a>
</div>
<div class='desc'>
Open. Safe. Easy.
</div>
</header>
</section>
<section class='widget widget-search sep-after'>
<header>
<h4 class='title widget-title'>Search</h4>
</header>
<form action='/dataeggs/search' id='search-form' class='search-form'>
<label>
<span class='screen-reader-text'>Search</span>
<input id='search-term' class='search-term' type='search' name='q' placeholder='Search&hellip;'>
</label></form>
</section>
<section class='widget widget-sidebar_menu sep-after'><nav id='sidebar-menu' class='menu sidebar-menu' aria-label='Sidebar Menu'>
<div class='container'>
<ul><li class='item current'>
<a aria-current='page' href='/dataeggs/'>Home</a></li><li class='item'>
<a href='/dataeggs/privacy/'>Privacy</a></li><li class='item'>
<a href='/dataeggs/aeri_stacktraces/'>AERI</a></li><li class='item'>
<a href='/dataeggs/eclipse_mls/'>MLS</a></li><li class='item has-children'>
<a href=''>projects</a><button class='sub-menu-toggler'>
<span class='screen-reader-text'>expand sub menu</span>
<span class='sign'></span>
</button>
<ul class='sub-menu'><li class='item'>
<a href='/dataeggs/projects/ecd.che/datasets_report/'>ecd.che</a></li><li class='item'>
<a href='/dataeggs/projects/ee4j.glassfish/datasets_report/'>ee4j.glassfish</a></li><li class='item'>
<a href='/dataeggs/projects/modeling.emf-parsley/datasets_report/'>modeling.emf-parsley</a></li><li class='item'>
<a href='/dataeggs/projects/modeling.emfcompare/datasets_report/'>modeling.emfcompare</a></li><li class='item'>
<a href='/dataeggs/projects/modeling.epsilon/datasets_report/'>modeling.epsilon</a></li><li class='item'>
<a href='/dataeggs/projects/modeling.gendoc/datasets_report/'>modeling.gendoc</a></li><li class='item'>
<a href='/dataeggs/projects/modeling.m2t.acceleo/datasets_report/'>modeling.m2t.acceleo</a></li><li class='item'>
<a href='/dataeggs/projects/modeling.mdt.ocl/datasets_report/'>modeling.mdt.ocl</a></li><li class='item'>
<a href='/dataeggs/projects/modeling.sirius/datasets_report/'>modeling.sirius</a></li><li class='item'>
<a href='/dataeggs/projects/technology.apogy/datasets_report/'>technology.apogy</a></li><li class='item'>
<a href='/dataeggs/projects/technology.app4mc/datasets_report/'>technology.app4mc</a></li><li class='item'>
<a href='/dataeggs/projects/technology.collections/datasets_report/'>technology.collections</a></li><li class='item'>
<a href='/dataeggs/projects/technology.ease/datasets_report/'>technology.ease</a></li><li class='item'>
<a href='/dataeggs/projects/technology.egit/datasets_report/'>technology.egit</a></li><li class='item'>
<a href='/dataeggs/projects/technology.epf/datasets_report/'>technology.epf</a></li><li class='item'>
<a href='/dataeggs/projects/technology.jgit/datasets_report/'>technology.jgit</a></li><li class='item'>
<a href='/dataeggs/projects/technology.paho/datasets_report/'>technology.paho</a></li><li class='item'>
<a href='/dataeggs/projects/technology.scout/datasets_report/'>technology.scout</a></li><li class='item'>
<a href='/dataeggs/projects/tools.cdt/datasets_report/'>tools.cdt</a></li><li class='item'>
<a href='/dataeggs/projects/tools.tracecompass/datasets_report/'>tools.tracecompass</a></li></ul></li></ul>
</div>
</nav>
</section><section class='widget widget-social_menu sep-after'><nav aria-label='Social Menu'>
<ul><li>
<a href='https://gitlab.eclipse.org/dataeggs/dataeggs' target='_blank' rel='noopener me'>
<span class='screen-reader-text'>Open Gitlab account in new tab</span><svg class='icon' xmlns='http://www.w3.org/2000/svg' viewbox='0 0 24 24' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' aria-hidden='true'>
<title>GitLab icon</title> <path d="M22.65 14.39L12 22.13 1.35 14.39a.84.84 0 0 1-.3-.94l1.22-3.78 2.44-7.51A.42.42 0 0 1 4.82 2a.43.43 0 0 1 .58 0 .42.42 0 0 1 .11.18l2.44 7.49h8.1l2.44-7.51A.42.42 0 0 1 18.6 2a.43.43 0 0 1 .58 0 .42.42 0 0 1 .11.18l2.44 7.51L23 13.45a.84.84 0 0 1-.35.94z"/>
</svg>
</a>
</li><li>
<a href='mailto:boris@chrysalice.org' target='_blank' rel='noopener me'>
<span class='screen-reader-text'>Contact via Email</span><svg class='icon' xmlns='http://www.w3.org/2000/svg' viewbox='0 0 24 24' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' aria-hidden='true'>
<path d="M4 4h16c1.1 0 2 .9 2 2v12c0 1.1-.9 2-2 2H4c-1.1 0-2-.9-2-2V6c0-1.1.9-2 2-2z"></path><polyline points="22,6 12,13 2,6"></polyline>
</svg>
</a>
</li></ul>
</nav>
</section></div>
<div class='sidebar-overlay'></div>
</div><div class='main'><a class='screen-reader-text' href='#content'>Skip to Content</a>
<button id='sidebar-toggler' class='sidebar-toggler' aria-controls='sidebar'>
<span class='screen-reader-text'>Toggle Sidebar</span>
<span class='open'><svg class='icon' xmlns='http://www.w3.org/2000/svg' viewbox='0 0 24 24' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' aria-hidden='true'>
<line x1="3" y1="12" x2="21" y2="12" />
<line x1="3" y1="6" x2="21" y2="6" />
<line x1="3" y1="18" x2="21" y2="18" />
</svg>
</span>
<span class='close'><svg class='icon' xmlns='http://www.w3.org/2000/svg' viewbox='0 0 24 24' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' aria-hidden='true'>
<line x1="18" y1="6" x2="6" y2="18" />
<line x1="6" y1="6" x2="18" y2="18" />
</svg>
</span>
</button><div class='header-widgets'>
<div class='container'><style>.widget-breadcrumbs li:after{content:'\2f '}</style>
<section class='widget widget-breadcrumbs sep-after'>
<nav id='breadcrumbs'>
<ol><li><span>Home</span></li></ol>
</nav>
</section></div>
</div>
<header id='header' class='header site-header'>
<div class='container sep-after'>
<div class='header-info'><p class='desc site-desc'></p>
</div>
</div>
</header>
<main id='content'>
<div class='entry'>
<div class='entry-cover'>
<figure class='container cover-normal'>
<img src='images/dataeggs-header.png'/>
</figure>
</div>
<div class='container entry-content'>
<p><a href="https://www.eclipse.org/dataeggs">Eclipse DataEggs</a> provides datasets related to the development of Eclipse projects, mainly for software practitionners and researchers.</p>
<p>The datasets include various pieces of data retrieved from the Eclipse forge: <strong>Mailing lists</strong>, <strong>Project development data</strong>, and <strong>AERI stacktraces</strong>, all in handy CSV and JSON formats. Each dataset comes with R Markdown documents describing its content and providing hints about how to use it. Examples provided mainly use the <a href="https://r-project.org">R statistical analysis software</a>.</p>
<h2 id="content">Content</h2>
<p>The datasets provided include:</p>
<ul>
<li><a href="eclipse_mls">Mailing lists</a> (full mboxes and csv extracts) hosted at the Eclipse forge with their <a href="eclipse_mls/mbox_csv_analysis">documentation and examples</a>.</li>
<li><a href="aeri_stacktraces">AERI exception stacktraces</a> (not updated anymore, historical data only) includes 2 datasets: problems (see <a href="aeri_stacktraces/problems_analysis.pdf">documentation</a>) and incidents (see <a href="aeri_stacktraces/problems_incidents.pdf">documentation</a>).</li>
<li><a href="projects/">Development data</a> from Eclipse projects. Depending on data sources available for each project, the following information is provided:
<ul>
<li>SCM (git).</li>
<li>ITS (Bugzilla, GitHub issues, GitLab issues).</li>
<li>CI (Jenkins).</li>
<li>PMI checks.</li>
<li>Stack Overflow statistics.</li>
<li>Scancode analysis (executed on our server).</li>
</ul>
</li>
</ul>
<p>Privacy has been a major concern from the beginning. Once extracted, data is anonymised using <a href="https://github.com/borisbaldassari/data-anonymiser">data-anonymiser</a> and published in the downloads section of the project. See <a href="privacy/">our documentation for more details</a></p>
<p>All data related to projects is retrieved from the <strong>Eclipse Alambic instance</strong> at <a href="https://eclipse.alambic.io">https://eclipse.alambic.io</a>. <strong>Alambic</strong> is <strong>an open-source framework for development data extraction and processing</strong>, for more information see <a href="https://alambic.io">https://alambic.io</a>.</p>
<h2 id="contributing">Contributing</h2>
<p>All work on the Eclipse DataEggs project is handled transparently at <a href="https://gitlab.eclipse.org/eclipse/dataeggs/">https://gitlab.eclipse.org/eclipse/dataeggs/</a> .</p>
<p>We&rsquo;re open: if you&rsquo;d like to contribute, please join us! You can:</p>
<ul>
<li>Get the code and propose merge requests on the <a href="https://gitlab.eclipse.org/eclipse/dataeggs/dataeggs">DataEggs repository</a>.</li>
<li>Should you have any problem, request or question, please fill an issue in the <a href="https://gitlab.eclipse.org/eclipse/dataeggs/dataeggs/-/issues">Eclipse GitLab project</a> page.</li>
</ul>
<h2 id="licencing">Licencing</h2>
<p>All datasets are published under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">Creative Commons BY-Attribution-Share Alike 4.0 (International)</a>.</p>
</div>
</div>
</main>
<footer id='footer' class='footer'>
<div class='container sep-before'><div class="row">
<div class="column">
<a href="http://www.eclipse.org/" target="_blank"><img src="/dataeggs/images/logo-eclipse-foundation.png" alt="Eclipse Foundation logo"></a>
</div>
<div class="column">
<p></p>
<p id="copyright">Copyright © 2021 Eclipse Foundation, Inc.<br>All Rights Reserved.</p>
</div>
</div>
<div class="row">
<p><a href="http://www.eclipse.org/legal/privacy.php" target="_blank">Privacy Policy</a> /
<a href="http://eclipse.org/" target="_blank">Eclipse</a> /
<a href="http://www.eclipse.org/legal/termsofuse.php" target="_blank">Terms of Use</a> /
<a href="http://www.eclipse.org/legal/copyright.php" target="_blank">Copyright Agent</a> /
<a href="http://www.eclipse.org/legal/" target="_blank">Legal</a> /
<a href="http://www.eclipse.org/org/foundation/contact.php" target="_blank"> Contact Us</a></p>
</div>
</div>
</footer>
</div>
</div><script>window.__assets_js_src="/dataeggs/assets/js/"</script>
<script src='/dataeggs/assets/js/main.c3bcf2df.js'></script><script src='/dataeggs/js/custom.js'></script>
</body>
</html>