| #!/usr/bin/perl |
| # Copyright (c) 2006 Eclipse Foundation, made available under EPL v1.0 |
| # Contributors Ward Cunningham, Bjorn Freeman-Benson |
| # |
| # usage: |
| # cat projects.txt | ./extract.pl > TEXT_FILE_OF_ARTICLES |
| # |
| # alternate: |
| # extract.pl --ignoretime |
| |
| use strict; |
| use Net::NNTP; |
| # see http://perldoc.perl.org/Net/NNTP.html |
| |
| my $dir = `pwd`; |
| my $ignoretime = 0; |
| $_ = shift; |
| $ignoretime = /\-\-ignoretime/; |
| |
| my $last; |
| if (-e 'last.time' && !$ignoretime) { |
| $last = `date -r last.time +'%Y/%m/%d %H:%M:%S'`; |
| print stderr "incremental extract from $last\n"; |
| } else { |
| print stderr "full extract\n"; |
| } |
| |
| my ($proj, $group); |
| my $nntp = new Net::NNTP( "news.eclipse.org" ); |
| $nntp->authinfo( "exquisitus", "flinder1f7" ); |
| |
| for (<STDIN>) { |
| chomp; |
| $proj = $1 if( /^Project: ([A-Za-z._-]+)/ ); |
| print stderr "."; |
| process( $proj, $1 ) if( /^Newsgroup: ([A-Za-z._-]+)/ ); |
| } |
| |
| $nntp->quit(); |
| |
| sub process { |
| my $proj = shift; |
| my $group = shift; |
| my $newnews = $nntp->newnews( 0, $group ); |
| foreach my $messageid ( @$newnews ) { |
| my $head = $nntp->head( $messageid ); |
| print "Project: $proj\n"; |
| print "Newsgroup: $group\n"; |
| foreach my $line ( @$head ) { |
| print $line; |
| } |
| print "-----------------------\n"; |
| } |
| } |