#!/usr/bin/perl -w # $Id$ use strict; use Socket; use Carp; $SIG{'__DIE__'} = \&confess; $SIG{'__WARN__'} = \&confess; $SIG{PIPE}=sub { die }; $SIG{ALRM}=sub { die }; $SIG{INT}=sub { die }; $SIG{HUP}=sub { die }; $SIG{TERM}=sub { die }; $SIG{ABRT}=sub { die }; my($NNTPSERVER)=$ENV{NNTPSERVER} or die "set NNTPSERVER"; my($NNTPPORT)=$ENV{NNTPPORT} || 119; my($history) = new History(shift(@ARGV),30*24*60*60); my($nntp) = new NNTPServer($NNTPSERVER,$NNTPPORT); my($decoder) = new Decoder($nntp,$history); my(@groups) = $nntp->expand_groups(@ARGV); for (@groups) { $decoder->group($_); $history->checkpoint(); } $nntp->close(); package Article; sub new { my($class,$xoverstring)=(@_); my($number,$subject,$from,$date,$messageid,$references,$bytes,$lines,$xref)=split(/\t/,$xoverstring); my($self)=bless { number => $number, subject => $subject, from => $from, date => $date, messageid => $messageid, references => $references, bytes => $bytes, lines => $lines, xref => $xref, },$class; return $self; } sub number { $_[0]->{number} } sub subject { $_[0]->{subject} } sub messageid { $_[0]->{messageid} } package Decoder; use FileHandle; sub new { my($class,$nntp,$history)=(@_); bless { history => $history, nntp => $nntp, },$class; } sub mkdir_p { my($dir)=(@_); return if (-d $dir) || length($dir)<2; print STDERR "Creating directory $dir\n"; my($path)=$dir; &mkdir_p($path) if $path =~ s:/[^/]+/*$::; mkdir($dir,0777) or die("mkdir: $dir: $!"); } sub open_file { $_[0] =~ s/[^\w.+-]+/_/go; $_[0] =~ s/^[^\w]/_$&/o; $_[0] = ".tmp/$_[0]"; mkdir_p(".tmp"); ($_[0] =~ s/(\.(\d+))?$/".".(defined($2) ? $2+1 : 1)/eo) while defined(-e $_[0]); sysopen($_[1],$_[0],O_CREAT|O_EXCL|O_WRONLY,0666) or die "create $_[0]: $!"; print STDERR "Created `$_[0]'...\n"; } sub uu_decode_line { my($line,$fh)=(@_); my($len)=(ord(substr($line,0,1))-32)%64; return unless $len; substr($line,0,1)=''; $line =~ tr/ -`/\000-\077\000/; $line = substr(decode_line($line),0,$len); if ($len > length($line)) { print STDERR "Fascinating: length of actual data length($line) < expected $len\n"; } else { syswrite($fh,$line,$len) == $len or die "write: $!"; } } sub mime_decode_line { my($line,$fh)=(@_); $line =~ /=*$/o; my($len)=length($&); $line =~ tr/A-Za-z0-9+\/=/\000-\077\000/; decode_line($line); syswrite($fh,$line,length($line)-$len) == length($line)-$len or die "write: $!"; } sub decode_line { spin(); $_[0]=unpack("B*",$_[0]); $_[0] =~ s/..(......)/$1/go; $_[0]=pack("B*",$_[0]); } sub link_all { my($name,@directories)=(@_); grep(s/[^\w.-]+/_/go,@directories); grep(s/\./\//go,@directories); grep(s/^\W/_$1/o,@directories); grep($_=lc($_),@directories); my($base)=$name; $base =~ s/^.*\///o; for (@directories) { mkdir_p($_); unless (-e "$_/$base") { print STDERR "link: $name -> $_/$base\n"; link($name,"$_/$base") or die "link: $!"; } } } sub close_file { my($name,$fh,$noise,$see,$history,$grouplist)=(@_); return unless length($$name); close($fh) or die "close: $!"; unless (-s $$name) { print STDERR "Empty file `$$name'\n"; unlink($$name) or die "unlink $$name: $!"; } else { print STDERR (-s _)." bytes written to `$$name'\n"; $$name.=",noise"; sysopen($fh,$$name,O_CREAT|O_EXCL|O_WRONLY,0666) or die "create $$name: $!"; print STDERR "Created `$$name'...\n"; syswrite($fh,$$noise,length($$noise)) == length($$noise) or die "write: $!"; close($fh) or die "close: $!"; $$noise=''; if ($$name =~ /\.[1-9]\d*$/o) { my($newfile)=$$name; while ($$name =~ s/\.([1-9]\d*)$/".".($1-1)/eo && -e ($$name)) { unless (system("cmp",$$name,$newfile)) { print STDERR "`$newfile' is identical to `$$name'. Removing `$newfile'\n"; unlink($newfile) or die "unlink `$newfile' failed: $!"; last; } } } link_all($$name,@$grouplist); $$name =~ s/,noise$//o; link_all($$name,@$grouplist); } $$name=''; grep($history->see($_->messageid()),@$see) if defined($see); } sub uulength { my($len)=int(((((ord($_[0]))-32)%64)+2)/3)*4+1; return $len if defined($_[1]); return abs($len-length($_[0]))<4; } my($spin)=0; sub spin { print STDERR substr("/-\\|",$spin++%4,1)."\b"; } sub group { my($self,$groupname)=(@_); my($nntp,$history)=($self->{nntp},$self->{history}); my($group)=$nntp->group($groupname); warn "Could not get to group `$groupname'\n",return unless defined($group); print STDERR "Getting overview file..."; $nntp->sendline("xover ".$group->first()."-".$group->last()); my($line); my(@xovers); while (($line=$nntp->recvline()) ne '.') { spin(); push(@xovers,new Article($line)); } print STDERR "\n"; @xovers=sort { &Subject::compare($a->subject(), $b->subject()) } grep(!$history->seen($_->messageid()),@xovers); my($headers)=''; my($noise)=''; my($filename)=''; my(@data)=(); my(@see_these)=(); my($mime_boundary)=''; my($group_list)=undef; my($fh)=new FileHandle; my($mode)=0; # 1=uuencode, 2=mime my($xoversleft)=$#xovers+1; for (@xovers) { print STDERR "Article ".$_->number()." (".($xoversleft--)." left), group `".$groupname."', subject `".$_->subject()."'\n"; $line=$nntp->sendline("ARTICLE ".$_->number()); warn "Could not get to article ".$_->number()." in group ".$group->name().": $line\n",next unless $line =~ /^220/o; my($content_free)=1; my($shut_up)=0; my($seen_base64)=0; my($newsgroups)=[ $groupname ]; my($prob_filename)=undef; my($is_noise); while (($line=$nntp->recvline()) ne '.') { $is_noise=1; if ($line =~ /^newsgroups:\s+(.+)/oi) { $newsgroups = [ grep(length($_),split(',',$1)) ]; $newsgroups = [ $groupname ] unless $#$newsgroups>=0; print STDERR "Found Newsgroups line: ".join(", ",@$newsgroups)."\n"; } elsif ($line =~ /^subject:.*\s+(\S+\.\w+)\s+/oi) { print STDERR "Probable Filename: $1\n"; $prob_filename=$1; } elsif ($line =~ /^begin \d+\s+(.*\S)/o) { print STDERR "Found uuencode begin line for file: `$1'\n"; print STDERR "File `$filename' truncated\n" if length($filename); close_file(\$filename,$fh,\$noise,undef,undef,$group_list); $filename=$1; open_file($filename,$fh); $mode = 1; $content_free=0; $group_list=$newsgroups; } elsif ($line =~ /^--([\w'()+.:=?\/-]*[\w'()+.:=?\/-])\s*$/o) { $mime_boundary=$1; print STDERR "Found MIME boundary string: `$mime_boundary'\n"; } elsif ($line =~ /^content-transfer-encoding:\s+base64\s*$/oi) { $seen_base64=1; print STDERR "Found base64 encoding hint\n"; } elsif (($line =~ /^content-\w+:.+filename\s*=\s*"(.+)"/io) || (($line eq '') && $seen_base64)) { print STDERR "File `$filename' truncated\n" if length($filename); close_file(\$filename,$fh,\$noise,undef,undef,$group_list); if (length($line)) { print STDERR "Found MIME filename: `$1'\n"; $filename=$1; } else { $prob_filename="Untitled" unless defined($prob_filename); print STDERR "Found MIME but no filename; probable filename is `$prob_filename'\n"; $filename=$prob_filename; } open_file($filename,$fh); $mode = 2; $content_free=0; $group_list=$newsgroups; $seen_base64=0; } elsif (!$mode) { # Look for a full line of uuencode data if ($line =~ /^M[ -`]{60}$/o) { print STDERR "Noise is possibly uuencoded data\n" unless $shut_up++; $is_noise=$content_free=0; } # Note that MIME never appears without its headers. } if ($mode == 1) { if ($line =~ m/^end\s*$/o) { print STDERR "Found end of uuencoded data\n"; close_file(\$filename,$fh,\$noise,[@see_these,$_],$history,$group_list); @see_these=(); $mode=0; $is_noise=$content_free=0; $group_list=undef; } elsif (($line =~ /^[ -`]+$/o)) { unless (uulength($line)) { print STDERR "Length is wrong (".length($line).", not ".uulength($line,1).")\n"; } else { uu_decode_line($line,$fh); $is_noise=$content_free=0; } } } elsif ($mode == 2) { my($mime_end)=''; if ($line =~ /^[A-Za-z0-9+\/]+(=*)$/o) { $is_noise=$mime_end=$1; mime_decode_line($line,$fh); $content_free=0; } if ($mime_end || $line =~ /^--([\w'()+.:=?\/-]*[\w'()+.:=?\/-])\s*$/o) { print STDERR "Found end of MIME data\n"; close_file(\$filename,$fh,\$noise,[@see_these,$_],$history,$group_list); @see_these=(); $mode=0; $is_noise=!$mime_end; $content_free=0; $group_list=undef; } } $noise.="$line\n" if $is_noise; print STDERR "Found noise: `$line'\n" if $is_noise && !$shut_up; } if ($mode) { print STDERR "To be continued: `$filename'\n"; push(@see_these,$_); } if ($content_free) { print STDERR "Disposing of content-free article `".$_->messageid()."'\n"; $history->see($_->messageid()); if ($mode) { print STDERR "File `$filename' truncated\n" if length($filename); $mode=0; close_file(\$filename,$fh,\$noise,\@see_these,$history,$group_list); @see_these=(); $group_list=undef; } } } } sub DESTROY { system("rm -rf .tmp"); } package History; use FileHandle; sub new { my($class,$filename,$maxage)=(@_); my($self)=bless {},$class; $self->{articles}={}; $self->{filename}=$filename; return $self unless defined($filename); my($fh)=new FileHandle; if (sysopen($fh,$filename,O_RDONLY)) { while (<$fh>) { if (m/^(\d+) (.+)/) { $self->{articles}->{$2}=$1 if time-$1 < $maxage; } else { warn "Malformed history line: $filename: $_"; } } close($fh) or die "close: $filename: $!"; } return $self; } sub seen { defined($_[0]->{articles}->{$_[1]}) } sub see { print STDERR "Marking `$_[1]' as read...\n"; $_[0]->{articles}->{$_[1]} = time; } sub checkpoint { my($self)=(@_); return unless defined($self->{filename}); print STDERR "Checkpointing history file `$$self{filename}'\n"; my($fh)=new FileHandle; sysopen($fh,$self->{filename}.".new",O_CREAT|O_EXCL|O_WRONLY,0666) or die "create: $$self{filename}: $!"; print $fh join("\n",map($self->{articles}->{$_}." ".$_,keys(%{$self->{articles}})),"") or die "write: $$self{filename}: $!"; close($fh) or die "close: $$self{filename}: $!"; rename($self->{filename}.".new",$self->{filename}) or die "rename: $$self{filename}.new -> $$self{filename}: $!"; } sub DESTROY { my($self)=(@_); $self->checkpoint(); } package Subject; sub compare { my($sa,$sb) = (@_); return 0 unless $sa cmp $sb; my(@la) = $sa =~ m/[a-zA-Z]+|\d+/go; my(@lb) = $sb =~ m/[a-zA-Z]+|\d+/go; my($aa,$bb); while ($#la>=0) { return -1 if $#lb<0; $aa=shift(@la); $bb=shift(@lb); return $aa <=> $bb if ($aa.$bb) =~ /^\d+$/o && $aa <=> $bb; return $aa cmp $bb if $aa cmp $bb; } return 1 if $#lb>=0; return 0; } package Group; sub new { my($class,$groupline)=(@_); my($_211,$count,$first,$last,$name) = split(' ',$groupline); return bless { count => $count, first => $first, 'last' => $last, name => $name, },$class; } sub first { return $_[0]->{first}; } sub last { return $_[0]->{'last'}; } sub name { return $_[0]->{name}; } package NNTPServer; use FileHandle; use Socket; sub new { my($class,$server,$port)=(@_); my($self)=bless { },$class; $self->{debug} = defined($ENV{NNTPDEBUG}); my($NNTPIP)=scalar(gethostbyname($server)) or die "gethostbyname: error $?, error status $!\n"; $port=scalar(getservbyname($port,'tcp')) or die "gethostbyname: error $?, error status $!\n" unless $port =~ /^\d+$/; my($nntp_sockaddr)=pack_sockaddr_in($port,$NNTPIP); $self->{'socket'} = new FileHandle; socket($self->{'socket'},PF_INET,SOCK_STREAM,getprotobyname('tcp')) or die "socket: $!"; print STDERR "Connecting to NNTP server $NNTPSERVER at ".inet_ntoa($NNTPIP).":$NNTPPORT...\n"; connect($self->{'socket'},$nntp_sockaddr) or die "connect: $!"; $self->{port}=$port; $self->{server}=$server; $self->{group}=undef; $self->{'socket'}->autoflush(1); $self->recvline(); $self->sendline("MODE READER"); return $self; } sub sendline { my($self,$line)=(@_); alarm(600); print STDERR "$$self{server}:$$self{port}: send `$line'\n" if $self->{debug}; syswrite($self->{'socket'},"$line\r\n",length($line)+2) == length($line)+2 or die "NNTP send: $$self{server}:$$self{port}: $!"; alarm(0); return $self->recvline(); } sub recvline { my($self)=(@_); alarm(600); my($s)=$self->{'socket'}; my($line) = scalar(<$s>); alarm(0); die "NNTP recv: $$self{server}:$$self{port}: $!" unless defined($line); $line =~ s/\r?\n$//o or die "NNTP recv: $$self{server}:$$self{port}: mangled line `$line'"; print STDERR "$$self{server}:$$self{port}: recv `$line'\n" if $self->{debug}; return $line; } sub group { my($self,$group)=(@_); print STDERR "Entering group `$group'\n"; my($resp)=$self->sendline("GROUP $group"); if ($resp =~ /^211/) { $self->{group} = new Group($resp); } else { return undef; } } sub close { my($self)=(@_); print STDERR "Disconnecting NNTP server $$self{server}:$$self{port}\n"; my($resp)=$self->sendline("QUIT"); die "close failed: error status $resp\n" unless $resp =~ /^205/o; } sub expand_groups { my($self,@groups)=(@_); my(%groups_out)=(); my(%groups_exclude)=(); my($exclude); srand(time * $$); for (@groups) { $exclude = s/^[!-]//; if (m/\*/o) { print STDERR "Expanding wildcard group `$_'\n"; my($resp)=$self->sendline("LIST ACTIVE $_"); die "list active failed: $resp" unless $resp =~ /^215/o; while (($resp=$self->recvline()) ne ".") { $resp =~ /^(\S+)/o or warn "Malformed list active line: $resp\n"; if ($exclude) { $groups_exclude{lc($1)}=undef; print STDERR "$_ !> $1\n"; } else { $groups_out{lc($1)}=rand(); print STDERR "$_ -> $1\n"; } } } else { if ($exclude) { $groups_exclude{lc($_)}=undef; } else { $groups_out{lc($_)}=rand(); } } } grep(delete($groups_out{$_}),keys(%groups_exclude)); return sort { $groups_out{$a} <=> $groups_out{$b} } keys(%groups_out); }