Initial implementation of new functionality to capture git author data
diff --git a/commits/capture/GitLogCapture.class.inc b/commits/capture/GitLogCapture.class.inc
new file mode 100644
index 0000000..6b548dd
--- /dev/null
+++ b/commits/capture/GitLogCapture.class.inc
@@ -0,0 +1,169 @@
+<?php
+/*******************************************************************************
+ * Copyright (c) 2013 Eclipse Foundation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Wayne Beaton (Eclipse Foundation)- initial API and implementation
+ *******************************************************************************/
+require_once dirname(__FILE__) . '/../common/git-functions.inc';
+
+class GitLogCapture extends GitLogProcessor {
+    var $project;
+	var $path;
+	var $record;
+	
+	function __construct($project, $path) {
+		$this->project = $project;
+		$this->path = $path;
+		
+		$this->createTables();
+		
+		$sql = "insert ignore into GitRepo (project, path) values ('$project', '$path')";
+		$this->execute($sql);
+		;
+	}
+
+	function startCommit($ref) {
+	    echo "\n#Commit: $ref\n";
+	    $sql = "insert ignore into GitCommit (path, ref) values ('$this->path', '$ref')";
+	    $this->execute($sql);
+		$this->record = array();
+	}
+
+	function data($ref, $key, $value) {
+		$this->record[$key] = $value;
+	}
+
+	function addFile($ref, $filename, $added, $removed) {
+	    $filename = addSlashes($filename);
+		$sql = "insert ignore into GitCommitFile (path, ref, file, added, removed) values ('$this->path', '$ref', '$filename', '$added', '$removed')";
+		$this->execute($sql);		    
+	}
+
+	function endCommit($ref) {
+		$date = date('Y-m-d H:i:s', $this->record['authorDate']);
+		$comment = $this->summarizeComment($this->record['comment']);
+		$comment = addSlashes($comment);
+		$changeId = $this->getChangeId($comment);
+		$sql = "
+		    update GitCommit
+                set date='$date', comment='$comment', changeId=$changeId
+                where path='$this->path' and ref='$ref'
+		";
+		$this->execute($sql);
+		
+		foreach($this->getAuthors() as $name => $email) {
+    		$sql = "insert ignore into GitCommitAuthor (path, ref, name, email) values ('$this->path', '$ref', '$name', '$email')";
+    		$this->execute($sql);
+		}		
+	}
+	
+	function getAuthors() {
+	    $authors = array();
+	    
+	    $name = @$this->record['authorName'];
+	    $email = @$this->record['author'];
+	    
+        $authors[$name] = $email;
+	    
+	    if (preg_match_all('/Also-By:(.+)<([^>]+)>/i', $this->record['comment'], $matches)) {
+	        for($count=0; $count<count($matches[0]); $count++) {
+	            $name = trim($matches[1][$count]);
+	            $email = trim($matches[2][$count]);
+
+	            $authors[$name] = $email;
+	        }
+	    }
+	    
+	    return $authors;
+	}
+	
+	function summarizeComment($comment) {
+        $lines = explode("\n", $comment);
+        return $lines[0];
+	}
+	
+	/**
+	 * Get the Gerrit change id from the commit comment (if it has been specified).
+	 */
+	function getChangeId($comment) {
+		if (!preg_match('/Change\-Id: (\w+)/', $comment, $matches)) return 'NULL';
+		return $matches[1];		
+	}
+	
+	function execute($sql) {
+		$sql = preg_replace('/\s+/', ' ', trim($sql));
+	    echo "$sql;\n";
+	}
+	
+	function createTables() {
+	    $this->createGitRepoTable();
+	    $this->createGitCommitTable();
+	    $this->createGitCommitFileTable();
+	    $this->createGitCommitAuthorTable();
+	}
+
+	function createGitRepoTable() {
+	    $sql = "
+            create table if not exists GitRepo (
+                project varchar(128) not null,
+    		    path varchar(128) not null,
+    		    CONSTRAINT UNIQUE (project, path),
+    		    INDEX (project),
+    		    INDEX (path)) ENGINE=InnoDB
+        ";
+	    $this->execute($sql);
+	}
+
+    function createGitCommitTable() {
+    	$sql = "
+            create table if not exists GitCommit (
+    		    path varchar(128) not null,
+    		    ref varchar(40) not null,
+    		    changeId varchar(60), 
+    		    date datetime not null,
+    		    comment varchar(1024) not null,
+    		    CONSTRAINT UNIQUE (path, ref), 
+    		    INDEX (path), 
+    		    INDEX (ref)) ENGINE=InnoDB
+        ";
+    	$this->execute($sql);
+    }
+
+    function createGitCommitFileTable() {
+    	$sql = "
+            create table if not exists GitCommitFile (
+    		    path varchar(128) not null,
+                ref varchar(40) not null,
+                file varchar(128) not null,
+                added int not null,
+                removed int not null,
+    		    CONSTRAINT UNIQUE (path, ref, file), 
+    		    INDEX (path),
+    		    INDEX (ref)) ENGINE=InnoDB
+    	";
+    	$this->execute($sql);
+    }
+
+    function createGitCommitAuthorTable() {
+        $sql = "
+            create table if not exists GitCommitAuthor (
+    		    path varchar(128) not null,
+                ref varchar(40) not null,
+                name varchar(128) not null,
+                email varchar(128) not null,
+    		    CONSTRAINT UNIQUE (path, ref, email), 
+    		    INDEX (path),
+    		    INDEX (ref),
+    		    INDEX (name),
+    		    INDEX (email)) ENGINE=InnoDB
+    	";
+        $this->execute($sql);
+    }
+}
+
+?>
\ No newline at end of file
diff --git a/commits/capture/git-author.php b/commits/capture/git-author.php
new file mode 100755
index 0000000..de5018e
--- /dev/null
+++ b/commits/capture/git-author.php
@@ -0,0 +1,63 @@
+#!/usr/bin/php
+<?php
+/*******************************************************************************
+ * Copyright (c) 2013 Eclipse Foundation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Wayne Beaton (Eclipse Foundation)- initial API and implementation
+ *******************************************************************************/
+error_reporting(E_ALL);
+ini_set("display_errors", true);
+
+/**
+ * This script parses the output of the git log command (with the --numstat
+ * option to include file names) and generates the tabular output format
+ * required by Dash.
+ * 
+ * Usage:
+ * 
+ *  git --git-dir=/gitroot/woolsey/iplog/.git/ log --numstat | php git-author.php
+ *  
+ * Examples: 
+ *  echo -e "technology.dash.woolsey\t/gitroot/woolsey/iplog/.git/" | ./git-author.php
+ *  wget http://www.eclipse.org/projects/web-api/roots-generator.php?type=git -O - 2> /dev/null | ./git-author.php
+ *  wget http://www.eclipse.org/projects/web-api/roots-generator.php?type=git -O - 2> /dev/null | grep eclipse.orion | ./git-author.php
+ *  
+ * @author Wayne Beaton
+ */
+
+require_once dirname(__FILE__) . '/../common/git-functions.inc';
+require_once dirname(__FILE__) . '/GitLogCapture.class.inc';
+require_once dirname(__FILE__) . '/../common/common-functions.inc';
+
+function captureGitLog($project, $path) {
+    //ensureGitLogCacheTableExists();
+    //ensureGitLogCacheLogTableExists();
+
+    $path = escapeshellcmd($path);
+    $gitCommand = "git --git-dir=$path log --branches --numstat --format=fuller --reverse";
+    logMessage("Git", $gitCommand);
+    $handle = popen($gitCommand, 'r');
+    parseGitLog($handle, new GitLogCapture($project, $path));
+    pclose($handle);
+}
+
+while (!feof(STDIN)) {
+	$line = fgets(STDIN);
+	if (!trim($line)) continue;
+	
+	$parts = explode("\t", $line);
+
+	$project = trim($parts[0]);
+	$path = trim($parts[1]);
+	
+	if (!$path) continue;
+	
+	captureGitLog($project, $path);
+}
+
+?>
\ No newline at end of file
diff --git a/commits/common/git-functions.inc b/commits/common/git-functions.inc
index ac70f71..22d11c7 100755
--- a/commits/common/git-functions.inc
+++ b/commits/common/git-functions.inc
@@ -48,7 +48,7 @@
 	$repository = 'unknown';
 	$project = 'unknown';
 	$top = 'unknown';
-	$commit = 'unknown';
+	$commit = null;
 	$comment = '';
 	
 	$projectNameSegmentPattern = "[a-zA-Z0-9\\-]+";
@@ -126,7 +126,13 @@
 			$date = strtotime(trim($matches[1]));
 			
 			$processor->data($commit, 'date', $date);
+			$processor->data($commit, 'commitDate', $date);
+		} else if (preg_match('/^AuthorDate: (.*)$/', $line, $matches)) {
+			// For example:
+			// AuthorDate:   Thu Dec 16 23:53:58 2010 -0500
+			$date = strtotime(trim($matches[1]));
 			
+			$processor->data($commit, 'authorDate', $date);
 		} else if (preg_match('/^([0-9]+)\s+([0-9]+)\s+(\w.*)$/', $line, $matches)) {
 			// For example:
 			// 2       1       org.eclipse.woolsey.iplog.submit/META-INF/MANIFEST.MF
@@ -140,6 +146,7 @@
 			$filetype = getFileType($filename);		
 					
 			$processor->file($commit, $filename, $filetype, $size);
+			$processor->addFile($commit, $filename, $added, $removed);
 		} else if (preg_match('/^\s+/',$line)) {
 			$comment = trim("$comment\n" . trim($line));
 		}
@@ -277,7 +284,16 @@
 class GitLogProcessor {
 	function startCommit($ref) {}
 	function data($ref, $key, $value) {}
+	
+	/**
+	 * @deprecated Use #addFile()
+	 * @param unknown $ref
+	 * @param unknown $filename
+	 * @param unknown $filetype
+	 * @param unknown $changeSize
+	 */
 	function file($ref, $filename, $filetype, $changeSize) {}
+	function addFile($ref, $filename, $added, $removed) {}
 	function endCommit($ref) {}
 }