#!/usr/bin/perl -w use warnings; use strict; use Data::Dumper; use LWP::Simple; use HTML::LinkExtor; use LWP; use Date::Parse; use Date::Format; use Digest::SHA qw(sha256_hex); # This is Free Software (GPLv3) # http://www.gnu.org/licenses/gpl-3.0.txt print "Creating LWP agent ($LWP::VERSION)...\n"; my $lua = LWP::UserAgent->new( keep_alive => 1, timeout => 30, agent => "Tor MirrorCheck Agent" ); sub sanitize { my $taintedData = shift; my $cleanedData; my $whitelist = '-a-zA-Z0-9: +'; # clean the data, return cleaned data $taintedData =~ s/[^$whitelist]//go; $cleanedData = $taintedData; return $cleanedData; } sub ExtractLinks { my $content = shift; my $url = shift; my @links; my $parser = HTML::LinkExtor->new(undef, $url); $parser->parse($content); foreach my $linkarray($parser->links) { my ($elt_type, $attr_name, $attr_value) = @$linkarray; if ($elt_type eq 'a' && $attr_name eq 'href' && $attr_value =~ /\/$/ && $attr_value =~ /^$url/) { push @links, Fetch($attr_value, \&ExtractLinks); } elsif ($attr_value =~ /\.(xpi|dmg|exe|tar\.gz)$/) #elsif ($attr_value =~ /\.(asc)$/) # small pgp files easier to test with { push @links, $attr_value; } } return @links; } sub ExtractDate { my $content = shift; $content = sanitize($content); my $date = str2time($content); if ($date) { print "\tExtractDate($content) = $date\n"; return $date; } else { print "\tExtractDate($content) = ?\n"; return undef; } } sub ExtractSig { my $content = shift; my $url = shift; my $sig = sha256_hex($content); print "\tExtractSig($url) = $sig\n"; return $sig; } sub Fetch { my ($url, $sub) = @_; # Base url for mirror $|++; # unbuffer stdout to show progress print "\nGET $url: "; my $request = new HTTP::Request GET => "$url"; my $result = $lua->request($request); my $code = $result->code(); print "$code\n"; if ($result->is_success && $code eq "200"){ my $content = $result->content; if ($content) { return $sub->($content, $url); } else { print "Unable to fetch $url, empty content returned.\n"; } } return undef; } my @columns; sub LoadMirrors { open(CSV, "<", "include/tor-mirrors.csv") or die "Cannot open tor-mirrors.csv: $!"; my $line = ; chomp($line); @columns = split(/\s*,\s*/, $line); my @mirrors; while ($line = ) { chomp($line); my @values = split(/\s*,\s*/, $line); my %server; for (my $i = 0; $i < scalar(@columns); $i++) { $server{$columns[$i]} = $values[$i] || ''; } $server{updateDate} = str2time($server{updateDate}) if ($server{updateDate}); push @mirrors, {%server}; } close(CSV); return @mirrors; } sub DumpMirrors { my @m = @_; open(CSV, ">", "include/tor-mirrors.csv") or die "Cannot open tor-mirrors.csv: $!"; print CSV join(", ", @columns) . "\n"; foreach my $server(@m) { $server->{updateDate} = gmtime($server->{updateDate}) if ($server->{updateDate}); print CSV join(", ", map($server->{$_}, @columns)); print CSV "\n"; } close(CSV); } my @m = LoadMirrors(); my $count = scalar(@m); print "We have a total of $count mirrors\n"; print "Fetching the last updated date for each mirror.\n"; my $tortime = Fetch("https://www.torproject.org/project/trace/www-master.torproject.org", \&ExtractDate); my @torfiles = Fetch("https://www.torproject.org/dist/", \&ExtractLinks); my %randomtorfiles; for (1 .. 1) { my $r = int(rand(scalar(@torfiles))); my $suffix = $torfiles[$r]; $suffix =~ s/^https:\/\/www.torproject.org//; $randomtorfiles{$suffix} = Fetch($torfiles[$r], \&ExtractSig); } print "Using these files for sig matching:\n"; print join("\n", keys %randomtorfiles); print "\n"; # Adjust official Tor time by out-of-date offset: number of days * seconds per day $tortime -= 1 * 172800; print "The official time for Tor is $tortime. \n"; for(my $server = 0; $server < scalar(@m); $server++) { foreach my $serverType('httpWebsiteMirror', 'httpsWebsiteMirror', 'ftpWebsiteMirror', 'httpDistMirror', 'httpsDistMirror') { if ($m[$server]->{$serverType}) { my $updateDate = Fetch("$m[$server]->{$serverType}/project/trace/www-master.torproject.org", \&ExtractDate); if ($updateDate) { $m[$server]->{updateDate} = $updateDate; $m[$server]->{sigMatched} = 1; foreach my $randomtorfile(keys %randomtorfiles) { my $sig = Fetch("$m[$server]->{$serverType}/$randomtorfile", \&ExtractSig); if (!$sig) { $m[$server]->{sigMatched} = 0; last; } elsif ($sig ne $randomtorfiles{$randomtorfile}) { $m[$server]->{sigMatched} = 0; last; } } } last; } } } sub PrintServer { my $server = shift; print OUT <<"END"; \n\n $server->{isoCC}\n $server->{orgName}\n Up to date\n END my %prettyNames = ( httpWebsiteMirror => "http", httpsWebsiteMirror => "https", ftpWebsiteMirror => "ftp", rsyncWebsiteMirror => "rsync", httpDistMirror => "http", httpsDistMirror => "https", rsyncDistMirror => "rsync", ); foreach my $precious ( sort keys %prettyNames ) { if ($server->{$precious}) { print OUT " {$precious} . "\">" . "$prettyNames{$precious}\n"; } else { print OUT " - \n"; } } print OUT "\n"; } my $outFile = "include/mirrors-table.wmi"; open(OUT, "> $outFile") or die "Can't open $outFile: $!"; # Here's where we open a file and print some wml include goodness # This is sorted from last known recent update to unknown update times foreach my $server ( sort { $b->{updateDate} <=> $a->{updateDate} } grep {$_->{updateDate} && $_->{updateDate} > $tortime && $_->{sigMatched}} @m ) { PrintServer($server); } DumpMirrors(@m); close(OUT);