blob: 79b26e403e69aabf15f6b6a55fce857d18daf77c [file] [log] [blame]
#!/usr/bin/perl
# ------------------------------------------------------------------------------
# Copyright (c) 2007 Eclipse Foundation, made available under EPL v1.0
# Contributor Karl Matthias
#
# Parse Subversion commits for the dash project
#
# usage:
# get-companies.pl
# svn-extract.pl | svn-parse.pl > TAB_FILE
use strict;
use FileHandle;
# ------------------------------------------------------------------------------
# Globals
my %types = (
'' => 'none',
'htm' => 'html',
'jpg' => 'jpeg'
);
my @keys = (
'DATE','YEAR','YEARMONTH','YEARMONTHDAY',
'TOPPROJECT','PROJECT','FILENAME','FILETYPE',
'REVISION','CHANGE_SIZE','MESSAGE_SIZE',
'LOGIN','COMPANY'
);
my %temp;
my %values;
my %progress;
my $buffer;
#Mward 072810
my $counter = 0;
# Company to committer map
my %companies;
my %unixlogins;
my @companydata = split /\n/, `cat companies.txt companies-alternate.txt`;
for my $each ( @companydata ) {
my @wrds = split /\t+/, $each;
$companies{$wrds[0]} = $wrds[1];
$wrds[2] =~ s/_/ /g;
$unixlogins{lc($wrds[2])} = $wrds[0];
}
# Project to file path map
my %projects = split /\s+/, `cat roots-svn.txt`;
# Last revisions dumped (for incremental loads)
my %revisions = split /\s+/, `cat last.revisions`;
# keep track of the project
my $projectid;
# ------------------------------------------------------------------------------
# Do the work!
print "#", map("$_\t", @keys), "\n";
while(<>) {
next if(/^Changed paths:/); # skip Changed paths: line
next if(/^\s*$/); # skip blank lines
my $progress = $progress{$temp{'PROJECT'}}++;
print stderr "\nparsing $temp{'PROJECT'}\n" unless $progress or length($temp{'PROJECT'}) < 1;
print stderr "." unless $progress % 1000;
# rev author year time count
if(/^r(\d+) \| ([^\|]+) \| (\d+-\d+-\d+) (\d+:\d+:\d+) .*? \| (\d+) line/) {
$buffer = undef;
%temp = undef;
my @date = split('-', $3);
$temp{'DATE'} = "$date[0]/$date[1]/$date[2]";
$temp{'YEAR'} = $date[0];
$temp{'YEARMONTH'} = $date[0] . $date[1];
$temp{'YEARMONTHDAY'} = $date[0] . $date[1] . $date[2];
$temp{'REVISION'} = $1;
$temp{'CHANGE_SIZE'} = 1; # SVN doesn't report this :(
$temp{'LOGIN'} = lc($2);
if( $temp{'LOGIN'} =~ /\s/ ) {
$temp{'LOGIN'} = $unixlogins{$temp{'LOGIN'}} if( $unixlogins{$temp{'LOGIN'}} );
}
$temp{'COMPANY'} = $companies{$temp{'LOGIN'}};
if(length($temp{'COMPANY'}) < 1) {
$temp{'COMPANY'} = 'unknown';
}
$temp{'MESSAGE_SIZE'} = 0; # reset for counting later
} elsif(/^ [MAD] (\/.*)/) {
$buffer .= $_;
} elsif(/^----------/) {
# last line, now loop through all the files and output them since we have the real message size
my @data = split("\n", $buffer);
my $line;
foreach $line (@data) {
$line =~ /^ [MAD] (\/.*)/;
$temp{'FILENAME'} = $1;
$temp{'FILETYPE'} = filetype($1);
$line =~ /^ [MAD] \/([^\/]+)\//;
$temp{'PROJECT'} = $projectid;
$temp{'TOPPROJECT'} = topproject($projectid);
print map("$temp{$_}\t", @keys), "\n";
}
if($temp{'REVISION'} > $revisions{$temp{'PROJECT'}}) {
$revisions{$temp{'PROJECT'}} = $temp{'REVISION'};
}
} elsif (/^~~~~~PROJECT: (.*)$/) {
#mward 072810 clear up problem where 'empty/nonexistant' repos are inserting a single bogus record
if ( $counter == 0 ){
$projectid = $1;
$counter ++;
} else {
#flush the data
$buffer = undef;
%temp = undef;
$projectid = $1;
$counter = 0;
}
} else {
$temp{'MESSAGE_SIZE'} += length();
}
}
# Write out revision history including any old history that wasn't updated this time
my $key;
my $fh = new FileHandle(">last.revisions") or print stderr "Can't overwrite last.revisions\n";
foreach $key (keys(%revisions)) {
$fh->print($key . "\t" . $revisions{$key} . "\n");
}
$fh->close();
# ------------------------------------------------------------------------------
# Functions
# Find the top project given the project name
sub topproject {
my($project) = @_;
if($project =~ m/\./) {
$project =~ /^(.*?)\..*?/;
return $1;
}
return $project; # top levels
}
# Try hard to get a good file type from the filename
sub filetype {
my($filename) = @_;
my $filetype;
# keep only everything after the last dot
my @fields = split(/\./, $filename);
$filetype = @fields[@fields - 1];
$filetype =~ s/:.*//; # some SVN filenames are screwy
if($filetype =~ /\//) {
$filetype = 'none';
}
# override some windows-ized filetypes
if(defined($types{$filetype})) {
$filetype = $types{$filetype};
}
# Ok, return it
return lc($filetype);
}
# ------------------------------------------------------------------------------
# example commit message:
#
#|r2393 | thallgren | 2007-07-23 03:11:09 -0400 (Mon, 23 Jul 2007) | 1 line
#|Changed paths:
#| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/BillOfMaterials.java
#| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/DepNode.java
#| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/GeneratorNode.java
#| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/ResolvedNode.java
#| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/UnresolvedNode.java
#|
#|Fixed issue with skipped nodes causing UnresolvedNodeException
#|------------------------------------------------------------------------------