| #!/usr/bin/php |
| <?php |
| /******************************************************************************* |
| * Copyright (c) 2010 Eclipse Foundation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * Wayne Beaton (Eclipse Foundation)- initial API and implementation |
| *******************************************************************************/ |
| |
| /** |
| * This script parses the output of the git log command (with the --numstat |
| * option to include file names) and generates the tabular output format |
| * required by Dash. |
| * |
| * Usage: |
| * |
| * git --git-dir=/gitroot/woolsey/iplog/.git/ log --numstat | ./git-parse.php |
| * |
| * Examples: |
| * echo -e "technology.dash.woolsey\t/gitroot/woolsey/iplog/.git/" | ./git-extract.php | ./git-parse.php |
| * wget http://www.eclipse.org/projects/web-api/roots-generator.php?type=git -O - 2> /dev/null | ./git-extract.php | ./git-parse.php |
| * wget http://www.eclipse.org/projects/web-api/roots-generator.php?type=git -O - 2> /dev/null | grep virgo | ./git-extract.php | ./git-parse.php | ./chunk.pl ./insert.pl |
| * |
| * @author Wayne Beaton |
| */ |
| |
| $project = 'unknown'; |
| $top = 'unknown'; |
| $commit = 'unknown'; |
| $login = 'unknown'; |
| $company = 'unknown'; |
| $date = 'unknown'; |
| $year = 'unknown'; |
| $yearmonth = 'unknown'; |
| $yearmonthday = 'unknown'; |
| |
| $headers = array( |
| "DATE", "YEAR", "YEARMONTH","YEARMONTHDAY", |
| "TOPPROJECT","PROJECT","FILENAME","FILETYPE","REVISION", |
| "CHANGE_SIZE","MESSAGE_SIZE","LOGIN","COMPANY"); |
| |
| $projectNameSegmentPattern = "[a-zA-Z0-9\\-]+"; |
| $projectNamePattern = "(([\w\-]+)(\.[\w\-]+){0,2})"; |
| |
| echo "#" . implode("\t", $headers) . "\n"; |
| |
| /* |
| * The logic here is this: We encounter the information |
| * in a line-by-line manner. As we encounter the basic information |
| * (like the committer identity, commit Id, etc), we remember it. |
| * As we encounter file names, we use the information we've collected |
| * so far to output a record for that file name. |
| * |
| * We assume that all the appropriate header information is always |
| * included as part of each block. |
| * |
| * Data comes to us in this form: |
| * --------------------------------------------------------- |
| * Project: technology.dash.woolsey |
| * commit 4e918dade5701c6cf03ad3608489ce00738fc8b5 |
| * Author: Wayne Beaton <wayne@eclipse.org> |
| * AuthorDate: Thu Dec 16 23:53:58 2010 -0500 |
| * Commit: Wayne Beaton <wayne@eclipse.org> |
| * CommitDate: Thu Dec 16 23:53:58 2010 -0500 |
| * |
| * Bug 332692 Added support to capture date as part of submit request. Also added some rudimentary testing of input data and error reporting. |
| * |
| * 2 1 org.eclipse.woolsey.iplog.submit/META-INF/MANIFEST.MF |
| * 4 1 org.eclipse.woolsey.iplog.submit/futz.jpage |
| * 13 8 org.eclipse.woolsey.iplog.submit/src/org/eclipse/woolsey/iplog/submit/IpzillaClient.java |
| * 40 5 org.eclipse.woolsey.iplog.submit/src/org/eclipse/woolsey/iplog/submit/wizards/SubmitInfoWizardPage.java |
| * 60 8 org.eclipse.woolsey.iplog.submit/src/org/eclipse/woolsey/iplog/submit/wizards/SubmitOperation.java |
| * --------------------------------------------------------- |
| * |
| * So, as we encounter the Project, commit, Author, and Date lines, we |
| * record that information. It's only when we hit the file name lines |
| * (at the bottom) that we actually write anything out. |
| */ |
| while (!feof(STDIN)) { |
| $line = fgets(STDIN); |
| |
| // For example: |
| // Project: technology.dash.woolsey |
| if (preg_match("/^Project: (([\w\-]+)(\.[\w\-]+){0,2})$/i", $line, $matches)) { |
| $project = $matches[1]; |
| $top = $matches[2]; |
| |
| // Just to be safe and make sure that information doesn't |
| // bleed from one project to the next, we reset everything. |
| $commit = 'unknown'; |
| $login = 'unknown'; |
| $company = 'unknown'; |
| $date = 'unknown'; |
| $year = 'unknown'; |
| $yearmonth = 'unknown'; |
| $yearmonthday = 'unknown'; |
| } |
| |
| // For example: |
| // commit 4e918dade5701c6cf03ad3608489ce00738fc8b5 |
| if (preg_match('/^commit ([a-f0-9]+)$/i', $line, $matches)) { |
| $commit = $matches[1]; |
| |
| // Just to be safe and make sure that information doesn't |
| // bleed from one commit to the next, we reset everything. |
| $login = 'unknown'; |
| $company = 'unknown'; |
| $date = 'unknown'; |
| $year = 'unknown'; |
| $yearmonth = 'unknown'; |
| $yearmonthday = 'unknown'; |
| } |
| |
| // Note that the committer could be an email address or |
| // a committer id (we expect that it mostly likely an email |
| // address. |
| // |
| // For example: |
| // Commit: Wayne Beaton <wayne@eclipse.org> |
| // Commit: wbeaton |
| // Commit: spingel <> |
| // Commit: Steffen Pingel <steffen.pingel@tasktop.com> |
| // Commit: steffen.pingel@tasktop.com |
| if (preg_match('/^Commit:/', $line)) { |
| // First, look for a name between <> |
| if (preg_match('/<([^>]+)>/i', $line, $matches)) { |
| $login = getCommitterId(trim($matches[1])); |
| $company = getCommitterCompany($login); |
| } else |
| // Then, try to grab the first word after "Commit:" and hope for the best. |
| if (preg_match('/^Commit:\s*(\w+)/', $line, $matches)) { |
| $login = getCommitterId($matches[1]); |
| $company = getCommitterCompany($login); |
| } |
| // Otherwise, fail miserably. |
| } |
| |
| // For example: |
| // CommitDate: Thu Dec 16 23:53:58 2010 -0500 |
| if (preg_match('/^CommitDate: (.*)$/', $line, $matches)) { |
| $date = strtotime(trim($matches[1])); |
| $year = date('Y', $date); |
| $yearmonth = date('Ym', $date); |
| $yearmonthday = date('Ymd', $date); |
| $date = date("Y/m/d", $date); |
| } |
| |
| // For example: |
| // 2 1 org.eclipse.woolsey.iplog.submit/META-INF/MANIFEST.MF |
| if (preg_match('/^([0-9]+)\s+([0-9]+)\s+(\w.*)$/', $line, $matches)) { |
| if ((int)$year < 2000) continue; // Ignore weird data (see Bug 333620). |
| $added = $matches[1]; |
| $removed = $matches[2]; |
| |
| $change_size = $added + $removed; |
| $message_size = 0; |
| |
| $filename = $matches[3]; |
| $filetype = getFileType($filename); |
| |
| echo "$date\t$year\t$yearmonth\t$yearmonthday\t$top\t$project\t$filename\t$filetype\t$commit\t$change_size\t$message_size\t$login\t$company\n"; |
| } |
| } |
| |
| // TODO Move to a "common" import |
| function getFileType($filename) { |
| if (preg_match('/.*\.(\w+)$/', $filename, $matches)) { |
| $extension = $matches[1]; |
| if ($extension == 'htm') return 'html'; |
| if ($extension == 'jpg') return 'jpeg'; |
| return $extension; |
| } |
| return 'unknown'; |
| } |
| |
| function getCommitterId($address) { |
| $address = strtolower($address); |
| // If it's an email address, translate to a committer id. |
| $map = getEmailToCommitterMap(); |
| if (isset($map[$address])) { |
| $id = $map[$address]; |
| return $id; |
| } |
| |
| /* |
| * If it looks like a committer id followed by a UUID, then |
| * it's probably a commit that's been migrated from CVS. Grab |
| * the committer id. |
| * |
| * e.g. dschaefer@6a79697e-3843-0410-8446-a9668620458d |
| */ |
| if (preg_match('/^(\w+)@[\w\-]+$/', $address, $matches)) { |
| return $matches[1]; |
| } |
| |
| return $address; |
| } |
| |
| function &getEmailToCommitterMap() { |
| global $_emailToCommitterMap; |
| |
| if ($_emailToCommitterMap) return $_emailToCommitterMap; |
| |
| $_emailToCommitterMap = array(); |
| $file = fopen('https://www.eclipse.org/projects/web-api/email-id-map.php', 'r'); |
| if (!$file) return; |
| while (!feof($file)) { |
| $line = fgets($file); |
| $parts = split("\t", $line); |
| $email = strtolower(trim($parts[0])); |
| $id = trim($parts[1]); |
| |
| $_emailToCommitterMap[$email] = $id; |
| } |
| fclose($file); |
| |
| return $_emailToCommitterMap; |
| } |
| |
| function getCommitterCompany($id) { |
| $id = strtolower($id); |
| $map = getCommitterToCompanyMap(); |
| if (isset($map[$id])) { |
| $company = $map[$id]; |
| if ($company) return $company; |
| } |
| return 'unknown'; |
| } |
| |
| function &getCommitterToCompanyMap() { |
| global $_committerToCompanyMap; |
| |
| if ($_committerToCompanyMap) return $_committerToCompanyMap; |
| |
| $_committerToCompanyMap = array(); |
| $file = fopen('https://www.eclipse.org/projects/web-api/commit-companies.php', 'r'); |
| if (!$file) return; |
| while (!feof($file)) { |
| $line = fgets($file); |
| $parts = split("\t", $line); |
| $id = strtolower(trim($parts[0])); |
| $company = trim($parts[1]); |
| |
| $_committerToCompanyMap[$id] = $company; |
| } |
| fclose($file); |
| |
| return $_committerToCompanyMap; |
| } |
| |
| ?> |