blob: 590aeefcfb722098a58baff0b0e57df08ee2919e [file] [log] [blame]
#!/usr/bin/perl
# Copyright (c) 2006 Eclipse Foundation, made available under EPL v1.0
# Contributors Ward Cunningham, Bjorn Freeman-Benson
#
# usage:
# cat projects.txt | ./extract.pl > TEXT_FILE_OF_ARTICLES
#
# alternate:
# extract.pl --ignoretime
use strict;
# Project: eclipse
# Newsgroup: eclipse.platform
# From: Tom Roche <tlroche@us.ibm.com>
# Date: Fri, 18 Apr 2003 00:07:12 -0400
# Message-ID: <3E9F79F0.2040101@us.ibm.com>
# In-Reply-To: <b7mqdn$kue$1@rogue.oti.com>
my ($project, $newsgroup, $email, $date, $messageid, $replyto);
print "# MESSAGEID EMAIL DATE NEWSGROUP REPLYTOID PROJECT\n";
for (<STDIN>) {
chomp;
if( /^--------------/ ) {
write_data();
$project = undef;
$newsgroup = undef;
$email = undef;
$date = undef;
$messageid = undef;
$replyto = undef;
} else {
$project = $1 if( /^Project: ([A-Za-z._-]+)/ );
$newsgroup= $1 if( /^Newsgroup: ([A-Za-z._-]+)/ );
$email = $1 if( /^From: [^<]*<([^>]+)>/ );
$email = $1 if( /^From: (\S+) \(/ );
$date = $1 if( /^Date: (.*)/ );
$messageid = $1 if( /^Message-ID: <(.*)>/ );
$replyto = $1 if( /^In-Reply-To: <(.*)>/ );
}
}
sub write_data {
print "$messageid $email $date $newsgroup $replyto $project\n";
}