| #!/usr/bin/perl |
| # Copyright (c) 2006 Eclipse Foundation, made available under EPL v1.0 |
| # Contributors Ward Cunningham, Bjorn Freeman-Benson |
| # |
| # usage: |
| # cat projects.txt | ./extract.pl > TEXT_FILE_OF_ARTICLES |
| # |
| # alternate: |
| # extract.pl --ignoretime |
| |
| use strict; |
| |
| |
| # Project: eclipse |
| # Newsgroup: eclipse.platform |
| # From: Tom Roche <tlroche@us.ibm.com> |
| # Date: Fri, 18 Apr 2003 00:07:12 -0400 |
| # Message-ID: <3E9F79F0.2040101@us.ibm.com> |
| # In-Reply-To: <b7mqdn$kue$1@rogue.oti.com> |
| |
| my ($project, $newsgroup, $email, $date, $messageid, $replyto); |
| |
| print "# MESSAGEID EMAIL DATE NEWSGROUP REPLYTOID PROJECT\n"; |
| |
| for (<STDIN>) { |
| chomp; |
| if( /^--------------/ ) { |
| write_data(); |
| $project = undef; |
| $newsgroup = undef; |
| $email = undef; |
| $date = undef; |
| $messageid = undef; |
| $replyto = undef; |
| } else { |
| $project = $1 if( /^Project: ([A-Za-z._-]+)/ ); |
| $newsgroup= $1 if( /^Newsgroup: ([A-Za-z._-]+)/ ); |
| $email = $1 if( /^From: [^<]*<([^>]+)>/ ); |
| $email = $1 if( /^From: (\S+) \(/ ); |
| $date = $1 if( /^Date: (.*)/ ); |
| $messageid = $1 if( /^Message-ID: <(.*)>/ ); |
| $replyto = $1 if( /^In-Reply-To: <(.*)>/ ); |
| } |
| } |
| |
| sub write_data { |
| print "$messageid $email $date $newsgroup $replyto $project\n"; |
| } |