update-mirrors.pl
532901be
 #!/usr/bin/perl -w
 use warnings;
 use strict;
a090afa4
 use Data::Dumper;
532901be
 use LWP::Simple;
a090afa4
 use HTML::LinkExtor;
532901be
 use LWP;
 use Date::Parse;
759dd463
 use Date::Format;
a090afa4
 use Digest::SHA qw(sha256_hex);
532901be
 
238e706b
 # This is Free Software (GPLv3)
 # http://www.gnu.org/licenses/gpl-3.0.txt
 
532901be
 print "Creating LWP agent ($LWP::VERSION)...\n";
 my $lua = LWP::UserAgent->new(
     keep_alive => 1,
55e27233
     timeout => 30,
69c4ddc9
     agent => "Tor MirrorCheck Agent"
532901be
 );
 
 sub sanitize {
     my $taintedData = shift;
     my $cleanedData;
     my $whitelist = '-a-zA-Z0-9: +';
 
     # clean the data, return cleaned data
     $taintedData =~ s/[^$whitelist]//go;
     $cleanedData = $taintedData;
 
     return $cleanedData;
 }
a090afa4
 sub ExtractLinks {
     my $content = shift; 
     my $url     = shift;
     my @links;
532901be
 
a090afa4
     my $parser = HTML::LinkExtor->new(undef, $url);
     $parser->parse($content);
     foreach my $linkarray($parser->links)
     {
          my ($elt_type, $attr_name, $attr_value) = @$linkarray;
          if ($elt_type eq 'a' && $attr_name eq 'href' && $attr_value =~ /\/$/ && $attr_value =~ /^$url/)
          {
          	push @links, Fetch($attr_value, \&ExtractLinks);
          }
 	 elsif ($attr_value =~ /\.(xpi|dmg|exe|tar\.gz)$/)
53965667
 	 #elsif ($attr_value =~ /\.(asc)$/) # small pgp files easier to test with
a090afa4
          {
          	push @links, $attr_value;
          }
     }
     return @links;
 }
 
 sub ExtractDate {
     my $content = shift;  
     $content    = sanitize($content);
     my $date    = str2time($content);
 
     if ($date) {
f08a2ef2
     	print "\tExtractDate($content) = $date\n";
a090afa4
         return $date;
     } else {
f08a2ef2
     	print "\tExtractDate($content) = ?\n";
a090afa4
 	return undef;
     }
 }
532901be
 
a090afa4
 sub ExtractSig {
f08a2ef2
     my $content = shift; 
     my $url     = shift;
     my $sig = sha256_hex($content);
     print "\tExtractSig($url) = $sig\n";
     return $sig;
a090afa4
 }
 
 sub Fetch {
     my ($url, $sub) = @_; # Base url for mirror
f08a2ef2
     $|++; # unbuffer stdout to show progress
532901be
 
f08a2ef2
     print "\nGET $url: ";
532901be
     my $request = new HTTP::Request GET => "$url";
     my $result = $lua->request($request);
a86b52c5
     my $code = $result->code();
f08a2ef2
     print "$code\n";
532901be
 
a86b52c5
     if ($result->is_success && $code eq "200"){
a090afa4
        my $content = $result->content;
532901be
        if ($content) {
a090afa4
 	    return $sub->($content, $url);
532901be
         } else {
a090afa4
             print "Unable to fetch $url, empty content returned.\n";
532901be
         }
     }
a86b52c5
 
a090afa4
     return undef;
532901be
 }
53965667
 my @columns;
 sub LoadMirrors {
     open(CSV, "<", "include/tor-mirrors.csv") or die "Cannot open tor-mirrors.csv: $!"; 
     my $line = <CSV>;
     chomp($line);
     @columns = split(/\s*,\s*/, $line);
     my @mirrors;
     while ($line = <CSV>)
     {
         chomp($line);
 	my @values = split(/\s*,\s*/, $line);
 	my %server;
 	for (my $i = 0; $i < scalar(@columns); $i++)
 	{
 	    $server{$columns[$i]} = $values[$i] || '';
 	}
 	$server{updateDate} = str2time($server{updateDate}) if ($server{updateDate});
 	push @mirrors, {%server};
     }
     close(CSV);
     return @mirrors;
 }
532901be
 
53965667
 sub DumpMirrors {
     my @m = @_;
3dceeeff
     open(CSV, ">", "include/tor-mirrors.csv") or die "Cannot open tor-mirrors.csv: $!";
53965667
     print CSV join(", ", @columns) . "\n";
     foreach my $server(@m) {
 	$server->{updateDate} = gmtime($server->{updateDate}) if ($server->{updateDate});
         print CSV join(", ", map($server->{$_}, @columns));
 	print CSV "\n";
fb696fce
     }
532901be
 
53965667
     close(CSV);
 }
 
 my @m     = LoadMirrors();
 my $count = scalar(@m);
532901be
 print "We have a total of $count mirrors\n";
 print "Fetching the last updated date for each mirror.\n";
 
53965667
 my $tortime  = Fetch("https://www.torproject.org/project/trace/www-master.torproject.org", \&ExtractDate);
a090afa4
 my @torfiles = Fetch("https://www.torproject.org/dist/", \&ExtractLinks); 
 my %randomtorfiles;
 
 for (1 .. 1)
 {
 	my $r = int(rand(scalar(@torfiles)));
 	my $suffix = $torfiles[$r];
 	$suffix =~ s/^https:\/\/www.torproject.org//;
 	$randomtorfiles{$suffix} = Fetch($torfiles[$r], \&ExtractSig);
 }
 
 print "Using these files for sig matching:\n";
 print join("\n", keys %randomtorfiles);
f08a2ef2
 print "\n";
a090afa4
 
3dceeeff
 # Adjust official Tor time by out-of-date offset: number of days * seconds per day
f80d2636
 $tortime -= 1 * 172800;
3d7f7acd
 print "The official time for Tor is $tortime. \n";
532901be
 
53965667
 for(my $server = 0; $server < scalar(@m); $server++) {
     foreach my $serverType('httpWebsiteMirror', 'httpsWebsiteMirror', 'ftpWebsiteMirror', 'httpDistMirror', 'httpsDistMirror')
a090afa4
     {
53965667
         if ($m[$server]->{$serverType}) {
             my $updateDate = Fetch("$m[$server]->{$serverType}/project/trace/www-master.torproject.org", \&ExtractDate);
     				      
             if ($updateDate) {
 		$m[$server]->{updateDate} = $updateDate;
 		$m[$server]->{sigMatched} = 1;
a090afa4
                 foreach my $randomtorfile(keys %randomtorfiles) {
53965667
                     my $sig = Fetch("$m[$server]->{$serverType}/$randomtorfile", \&ExtractSig);
             	    if (!$sig) {
 			$m[$server]->{sigMatched} = 0;
             	    	last;
 		    } elsif ($sig ne $randomtorfiles{$randomtorfile}) {
 			$m[$server]->{sigMatched} = 0;
             	    	last;
             	    }
a090afa4
 		}
             }
 	    last;
53965667
         }
532901be
     }
a090afa4
 }
 
53965667
 sub PrintServer {
      my $server = shift;
759dd463
 print OUT <<"END";
      \n<tr>\n
f08a2ef2
          <td>$server->{isoCC}</td>\n
          <td>$server->{orgName}</td>\n
3dceeeff
          <td>Up to date</td>\n
759dd463
 END
 
      my %prettyNames = (
                         httpWebsiteMirror => "http",
                         httpsWebsiteMirror => "https",
                         ftpWebsiteMirror => "ftp",
                         rsyncWebsiteMirror => "rsync",
                         httpDistMirror => "http",
                         httpsDistMirror => "https",
98ae0563
                         rsyncDistMirror => "rsync", );
759dd463
 
      foreach my $precious ( sort keys %prettyNames )
532901be
      {
f08a2ef2
         if ($server->{$precious}) {
53965667
             print OUT "    <td><a href=\"" . $server->{$precious} . "\">" .
759dd463
                       "$prettyNames{$precious}</a></td>\n";
532901be
         } else { print OUT "    <td> - </td>\n"; }
      }
 
      print OUT "</tr>\n";
 }
 
53965667
 
 my $outFile = "include/mirrors-table.wmi";
 open(OUT, "> $outFile") or die "Can't open $outFile: $!";
 
 # Here's where we open a file and print some wml include goodness
 # This is sorted from last known recent update to unknown update times
f08a2ef2
 foreach my $server ( sort { $b->{updateDate} <=> $a->{updateDate} } grep {$_->{updateDate} && $_->{updateDate} > $tortime && $_->{sigMatched}} @m ) {
     PrintServer($server);
 }
53965667
 
 DumpMirrors(@m);
 
532901be
 close(OUT);