blob: c3b81735ef16a2f0161cec03579626a4f9bcb118 [file] [log] [blame]
<?php
/*******************************************************************************
* Copyright (c) 2010, 2011 Eclipse Foundation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Wayne Beaton (Eclipse Foundation)- initial API and implementation
*******************************************************************************/
/**
* The logic here is this: We encounter the information
* in a line-by-line manner. As we encounter the basic information
* (like the committer identity, commit Id, etc), we remember it.
* As we encounter file names, we use the information we've collected
* so far to output a record for that file name.
*
* We assume that all the appropriate header information is always
* included as part of each block.
*
* Data comes to us in this form:
* ---------------------------------------------------------
* Project: technology.dash.woolsey
* commit 4e918dade5701c6cf03ad3608489ce00738fc8b5
* Author: Wayne Beaton <wayne@eclipse.org>
* AuthorDate: Thu Dec 16 23:53:58 2010 -0500
* Commit: Wayne Beaton <wayne@eclipse.org>
* CommitDate: Thu Dec 16 23:53:58 2010 -0500
*
* Bug 332692 Added support to capture date as part of submit request. Also added some rudimentary testing of input data and error reporting.
*
* 2 1 org.eclipse.woolsey.iplog.submit/META-INF/MANIFEST.MF
* 4 1 org.eclipse.woolsey.iplog.submit/futz.jpage
* 13 8 org.eclipse.woolsey.iplog.submit/src/org/eclipse/woolsey/iplog/submit/IpzillaClient.java
* 40 5 org.eclipse.woolsey.iplog.submit/src/org/eclipse/woolsey/iplog/submit/wizards/SubmitInfoWizardPage.java
* 60 8 org.eclipse.woolsey.iplog.submit/src/org/eclipse/woolsey/iplog/submit/wizards/SubmitOperation.java
* ---------------------------------------------------------
*
* So, as we encounter the Committer, Author, Date, or file lines, we invoke appropriate
* methods on the processor object (instance of GitLogProcessor or a subtype).
*
* @param resource $handle
* @param GitLogProcessor callback instance responsible for handing processing events.
*/
function parseGitLog($handle, $processor) {
$repository = 'unknown';
$project = 'unknown';
$top = 'unknown';
$commit = null;
$comment = '';
$projectNameSegmentPattern = "[a-zA-Z0-9\\-]+";
$projectNamePattern = "(([\w\-]+)(\.[\w\-]+){0,2})";
while (!feof($handle)) {
$line = fgets($handle);
if (preg_match("/^Repository: (.+)$/i", $line, $matches)) {
// For example:
// Repository: /gitroot/e4/org.eclipse.e4.utils.git
$repository = $matches[1];
} else if (preg_match("/^Project: (([\w\-]+)(\.[\w\-]+){0,2})$/i", $line, $matches)) {
// For example:
// Project: technology.dash.woolsey
$project = $matches[1];
$top = $matches[2];
} else if (preg_match('/^commit ([a-f0-9]+)$/i', $line, $matches)) {
// For example:
// commit 4e918dade5701c6cf03ad3608489ce00738fc8b5
if ($commit) {
$processor->data($commit, 'comment', $comment);
$processor->endCommit($commit);
}
$commit = $matches[1];
$comment = '';
$processor->startCommit($commit);
$processor->data($commit, 'project', $project);
$processor->data($commit, 'top', $top);
$processor->data($commit, 'repository', $repository);
} else if (preg_match('/^Commit:/', $line)) {
// Note that the committer could be an email address or
// a committer id (we expect that it mostly likely an email
// address.
//
// For example:
// Commit: Wayne Beaton <wayne@eclipse.org>
// Commit: wbeaton
// Commit: spingel <>
// Commit: Steffen Pingel <steffen.pingel@tasktop.com>
// Commit: steffen.pingel@tasktop.com
if (preg_match('/Commit:(.+)<([^>]+)>/i', $line, $matches)) {
// First, look for a name between <>
$processor->data($commit, 'committerName', trim($matches[1]));
$processor->data($commit, 'committer', trim($matches[2]));
} else if (preg_match('/^Commit:\s*(\w+)/', $line, $matches)) {
// Then, try to grab the first word after "Commit:" and hope for the best.
$processor->data($commit, 'committer', $matches[1]);
}
} else if (preg_match('/^Author:/', $line)) {
// Note that the author could be an email address or
// a committer id (we expect that it mostly likely an email
// address.
//
// For example:
// Author: Wayne Beaton <wayne@eclipse.org>
// Author: wbeaton
// Author: spingel <>
// Author: Steffen Pingel <steffen.pingel@tasktop.com>
// Author: steffen.pingel@tasktop.com
if (preg_match('/Author:(.+)<([^>]+)>/i', $line, $matches)) {
// First, look for a name between <>
$processor->data($commit, 'authorName', trim($matches[1]));
$processor->data($commit, 'author', trim($matches[2]));
} else if (preg_match('/^Author:\s*(\w+)/', $line, $matches)) {
// Then, try to grab the first word after "Commit:" and hope for the best.
$processor->data($commit, 'author', $matches[1]);
}
} else if (preg_match('/^CommitDate: (.*)$/', $line, $matches)) {
// For example:
// CommitDate: Thu Dec 16 23:53:58 2010 -0500
$date = strtotime(trim($matches[1]));
$processor->data($commit, 'date', $date);
$processor->data($commit, 'commitDate', $date);
} else if (preg_match('/^AuthorDate: (.*)$/', $line, $matches)) {
// For example:
// AuthorDate: Thu Dec 16 23:53:58 2010 -0500
$date = strtotime(trim($matches[1]));
$processor->data($commit, 'authorDate', $date);
} else if (preg_match('/^([0-9]+)\s+([0-9]+)\s+(\w.*)$/', $line, $matches)) {
// For example:
// 2 1 org.eclipse.woolsey.iplog.submit/META-INF/MANIFEST.MF
if ((int)date('Y', $date) < 2000) continue; // Ignore weird data (see Bug 333620).
$added = $matches[1];
$removed = $matches[2];
$size = $added + $removed;
$filename = $matches[3];
$filetype = getFileType($filename);
$processor->file($commit, $filename, $filetype, $size);
$processor->addFile($commit, $filename, $added, $removed);
} else if (preg_match('/^\s+/',$line)) {
$comment = trim("$comment\n" . trim($line));
}
}
if ($comment) $processor->data($commit, 'comment', $comment, null);
if ($commit) {
$processor->data($commit, 'comment', $comment);
$processor->endCommit($commit);
}
}
// TODO Move to a "common" import
function getFileType($filename) {
if (preg_match('/.*\.(\w+)$/', $filename, $matches)) {
$extension = $matches[1];
if ($extension == 'htm') return 'html';
if ($extension == 'jpg') return 'jpeg';
return $extension;
}
return 'unknown';
}
function getCommitterId($address) {
if (!$address) return null;
// If the address is a committer id, return the committer id.
if (isCommitterId($address)) return $address;
if ($mapped = mapBogusCommitterId($address)) return $mapped;
$address = strtolower($address);
// If it's an email address, translate to a committer id.
$map = getEmailToCommitterMap();
if (isset($map[$address])) {
$id = $map[$address];
return $id;
}
return null;
}
/**
* Some bogus ids have crept into the source repositories.
* We believe that these are related to LDAP problems. Regardless of
* the source, we need to sort these out.
*
* @param string $id The (potentially) bogus committer id
* @return string A relacement id, or null if a mapped value is not available.
*/
function mapBogusCommitterId($id) {
$map = array(
'uid8185' => 'gliu',
'uid8428' => 'bvosburgh',
'uid8436' => 'ydoshiro',
'uid8762' => 'aigdalov',
'uid8825' => 'dwagelaar',
'uid8941' => 'mkhouzam',
'uid9069' => 'pschonbac',
'uid9273' => 'szarnekow',
'uid9453' => 'sstundzig'
);
if (isset($map[$id])) return $map[$id];
// TODO can we dynamically generate this mapping with an LDAP call?
return null;
}
function isCommitterId($value) {
$map = getCommitterIdToEmailMap();
return isset($map[$value]);
}
function &getEmailToCommitterMap() {
global $_emailToCommitterMap;
global $_committerIdToEmailMap;
if ($_emailToCommitterMap) return $_emailToCommitterMap;
$_emailToCommitterMap = array();
$_committerIdToEmailMap = array();
$file = fopen('http://www.eclipse.org/projects/web-api/email-id-map.php', 'r');
if (!$file) return;
while (!feof($file)) {
$line = fgets($file);
$parts = split("\t", $line);
$email = strtolower(trim($parts[0]));
$id = trim($parts[1]);
$_emailToCommitterMap[$email] = $id;
$_committerIdToEmailMap[$id][] = $email;
}
fclose($file);
return $_emailToCommitterMap;
}
function &getCommitterIdToEmailMap() {
global $_committerIdToEmailMap;
getEmailToCommitterMap(); // Prime the pump, so to speak.
return $_committerIdToEmailMap;
}
function getCommitterCompany($id) {
if (!$id) return null;
$id = strtolower($id);
$map = getCommitterToCompanyMap();
if (isset($map[$id])) {
$company = $map[$id];
if ($company) return $company;
}
return null;
}
function &getCommitterToCompanyMap() {
global $_committerToCompanyMap;
if ($_committerToCompanyMap) return $_committerToCompanyMap;
$_committerToCompanyMap = array();
$file = fopen('http://www.eclipse.org/projects/web-api/commit-companies.php', 'r');
if (!$file) return;
while (!feof($file)) {
$line = fgets($file);
$parts = split("\t", $line);
$id = strtolower(trim($parts[0]));
$company = trim($parts[1]);
$_committerToCompanyMap[$id] = $company;
}
fclose($file);
return $_committerToCompanyMap;
}
class GitLogProcessor {
function startCommit($ref) {}
function data($ref, $key, $value) {}
/**
* @deprecated Use #addFile()
* @param unknown $ref
* @param unknown $filename
* @param unknown $filetype
* @param unknown $changeSize
*/
function file($ref, $filename, $filetype, $changeSize) {}
function addFile($ref, $filename, $added, $removed) {}
function endCommit($ref) {}
}
?>