| #!/usr/bin/perl |
| |
| # ------------------------------------------------------------------------------ |
| # Copyright (c) 2007 Eclipse Foundation, made available under EPL v1.0 |
| # Contributor Karl Matthias |
| # |
| # Parse Subversion commits for the dash project |
| # |
| # usage: |
| # get-companies.pl |
| # svn-extract.pl | svn-parse.pl > TAB_FILE |
| |
| |
| use strict; |
| use FileHandle; |
| |
| # ------------------------------------------------------------------------------ |
| # Globals |
| my %types = ( |
| '' => 'none', |
| 'htm' => 'html', |
| 'jpg' => 'jpeg' |
| ); |
| my @keys = ( |
| 'DATE','YEAR','YEARMONTH','YEARMONTHDAY', |
| 'TOPPROJECT','PROJECT','FILENAME','FILETYPE', |
| 'REVISION','CHANGE_SIZE','MESSAGE_SIZE', |
| 'LOGIN','COMPANY' |
| ); |
| my %temp; |
| my %values; |
| my %progress; |
| my $buffer; |
| #Mward 072810 |
| my $counter = 0; |
| |
| # Company to committer map |
| my %companies; |
| my %unixlogins; |
| my @companydata = split /\n/, `cat companies.txt companies-alternate.txt`; |
| for my $each ( @companydata ) { |
| my @wrds = split /\t+/, $each; |
| $companies{$wrds[0]} = $wrds[1]; |
| $wrds[2] =~ s/_/ /g; |
| $unixlogins{lc($wrds[2])} = $wrds[0]; |
| } |
| |
| # Project to file path map |
| my %projects = split /\s+/, `cat roots-svn.txt`; |
| |
| # Last revisions dumped (for incremental loads) |
| my %revisions = split /\s+/, `cat last.revisions`; |
| |
| # keep track of the project |
| my $projectid; |
| |
| # ------------------------------------------------------------------------------ |
| # Do the work! |
| print "#", map("$_\t", @keys), "\n"; |
| while(<>) { |
| next if(/^Changed paths:/); # skip Changed paths: line |
| next if(/^\s*$/); # skip blank lines |
| |
| my $progress = $progress{$temp{'PROJECT'}}++; |
| print stderr "\nparsing $temp{'PROJECT'}\n" unless $progress or length($temp{'PROJECT'}) < 1; |
| print stderr "." unless $progress % 1000; |
| |
| # rev author year time count |
| if(/^r(\d+) \| ([^\|]+) \| (\d+-\d+-\d+) (\d+:\d+:\d+) .*? \| (\d+) line/) { |
| $buffer = undef; |
| %temp = undef; |
| my @date = split('-', $3); |
| $temp{'DATE'} = "$date[0]/$date[1]/$date[2]"; |
| $temp{'YEAR'} = $date[0]; |
| $temp{'YEARMONTH'} = $date[0] . $date[1]; |
| $temp{'YEARMONTHDAY'} = $date[0] . $date[1] . $date[2]; |
| $temp{'REVISION'} = $1; |
| $temp{'CHANGE_SIZE'} = 1; # SVN doesn't report this :( |
| $temp{'LOGIN'} = lc($2); |
| if( $temp{'LOGIN'} =~ /\s/ ) { |
| $temp{'LOGIN'} = $unixlogins{$temp{'LOGIN'}} if( $unixlogins{$temp{'LOGIN'}} ); |
| } |
| $temp{'COMPANY'} = $companies{$temp{'LOGIN'}}; |
| if(length($temp{'COMPANY'}) < 1) { |
| $temp{'COMPANY'} = 'unknown'; |
| } |
| $temp{'MESSAGE_SIZE'} = 0; # reset for counting later |
| } elsif(/^ [MAD] (\/.*)/) { |
| $buffer .= $_; |
| } elsif(/^----------/) { |
| # last line, now loop through all the files and output them since we have the real message size |
| my @data = split("\n", $buffer); |
| my $line; |
| foreach $line (@data) { |
| $line =~ /^ [MAD] (\/.*)/; |
| $temp{'FILENAME'} = $1; |
| $temp{'FILETYPE'} = filetype($1); |
| |
| $line =~ /^ [MAD] \/([^\/]+)\//; |
| $temp{'PROJECT'} = $projectid; |
| $temp{'TOPPROJECT'} = topproject($projectid); |
| print map("$temp{$_}\t", @keys), "\n"; |
| } |
| if($temp{'REVISION'} > $revisions{$temp{'PROJECT'}}) { |
| $revisions{$temp{'PROJECT'}} = $temp{'REVISION'}; |
| } |
| } elsif (/^~~~~~PROJECT: (.*)$/) { |
| #mward 072810 clear up problem where 'empty/nonexistant' repos are inserting a single bogus record |
| if ( $counter == 0 ){ |
| $projectid = $1; |
| $counter ++; |
| } else { |
| #flush the data |
| $buffer = undef; |
| %temp = undef; |
| $projectid = $1; |
| $counter = 0; |
| } |
| } else { |
| $temp{'MESSAGE_SIZE'} += length(); |
| } |
| } |
| |
| # Write out revision history including any old history that wasn't updated this time |
| my $key; |
| my $fh = new FileHandle(">last.revisions") or print stderr "Can't overwrite last.revisions\n"; |
| foreach $key (keys(%revisions)) { |
| $fh->print($key . "\t" . $revisions{$key} . "\n"); |
| } |
| $fh->close(); |
| |
| # ------------------------------------------------------------------------------ |
| # Functions |
| |
| # Find the top project given the project name |
| sub topproject { |
| my($project) = @_; |
| |
| if($project =~ m/\./) { |
| $project =~ /^(.*?)\..*?/; |
| return $1; |
| } |
| |
| return $project; # top levels |
| } |
| |
| # Try hard to get a good file type from the filename |
| sub filetype { |
| my($filename) = @_; |
| |
| my $filetype; |
| # keep only everything after the last dot |
| my @fields = split(/\./, $filename); |
| $filetype = @fields[@fields - 1]; |
| $filetype =~ s/:.*//; # some SVN filenames are screwy |
| if($filetype =~ /\//) { |
| $filetype = 'none'; |
| } |
| |
| # override some windows-ized filetypes |
| if(defined($types{$filetype})) { |
| $filetype = $types{$filetype}; |
| } |
| |
| # Ok, return it |
| return lc($filetype); |
| } |
| |
| # ------------------------------------------------------------------------------ |
| # example commit message: |
| # |
| #|r2393 | thallgren | 2007-07-23 03:11:09 -0400 (Mon, 23 Jul 2007) | 1 line |
| #|Changed paths: |
| #| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/BillOfMaterials.java |
| #| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/DepNode.java |
| #| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/GeneratorNode.java |
| #| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/ResolvedNode.java |
| #| M /org.eclipse.buckminster/trunk/org.eclipse.buckminster.core/src/java/org/eclipse/buckminster/core/metadata/model/UnresolvedNode.java |
| #| |
| #|Fixed issue with skipped nodes causing UnresolvedNodeException |
| #|------------------------------------------------------------------------------ |