532901be |
#!/usr/bin/perl -w
use warnings;
use strict;
|
a090afa4 |
use Data::Dumper;
|
532901be |
use LWP::Simple;
|
a090afa4 |
use HTML::LinkExtor;
|
532901be |
use LWP;
use Date::Parse;
|
759dd463 |
use Date::Format;
|
a090afa4 |
use Digest::SHA qw(sha256_hex);
|
532901be |
|
238e706b |
# This is Free Software (GPLv3)
# http://www.gnu.org/licenses/gpl-3.0.txt
|
532901be |
print "Creating LWP agent ($LWP::VERSION)...\n";
my $lua = LWP::UserAgent->new(
keep_alive => 1,
|
55e27233 |
timeout => 30,
|
69c4ddc9 |
agent => "Tor MirrorCheck Agent"
|
532901be |
);
sub sanitize {
my $taintedData = shift;
my $cleanedData;
my $whitelist = '-a-zA-Z0-9: +';
# clean the data, return cleaned data
$taintedData =~ s/[^$whitelist]//go;
$cleanedData = $taintedData;
return $cleanedData;
}
|
a090afa4 |
sub ExtractLinks {
my $content = shift;
my $url = shift;
my @links;
|
532901be |
|
a090afa4 |
my $parser = HTML::LinkExtor->new(undef, $url);
$parser->parse($content);
foreach my $linkarray($parser->links)
{
my ($elt_type, $attr_name, $attr_value) = @$linkarray;
if ($elt_type eq 'a' && $attr_name eq 'href' && $attr_value =~ /\/$/ && $attr_value =~ /^$url/)
{
push @links, Fetch($attr_value, \&ExtractLinks);
}
elsif ($attr_value =~ /\.(xpi|dmg|exe|tar\.gz)$/)
|
53965667 |
#elsif ($attr_value =~ /\.(asc)$/) # small pgp files easier to test with
|
a090afa4 |
{
push @links, $attr_value;
}
}
return @links;
}
sub ExtractDate {
my $content = shift;
$content = sanitize($content);
my $date = str2time($content);
if ($date) {
|
f08a2ef2 |
print "\tExtractDate($content) = $date\n";
|
a090afa4 |
return $date;
} else {
|
f08a2ef2 |
print "\tExtractDate($content) = ?\n";
|
a090afa4 |
return undef;
}
}
|
532901be |
|
a090afa4 |
sub ExtractSig {
|
f08a2ef2 |
my $content = shift;
my $url = shift;
my $sig = sha256_hex($content);
print "\tExtractSig($url) = $sig\n";
return $sig;
|
a090afa4 |
}
sub Fetch {
my ($url, $sub) = @_; # Base url for mirror
|
f08a2ef2 |
$|++; # unbuffer stdout to show progress
|
532901be |
|
f08a2ef2 |
print "\nGET $url: ";
|
532901be |
my $request = new HTTP::Request GET => "$url";
my $result = $lua->request($request);
|
a86b52c5 |
my $code = $result->code();
|
f08a2ef2 |
print "$code\n";
|
532901be |
|
a86b52c5 |
if ($result->is_success && $code eq "200"){
|
a090afa4 |
my $content = $result->content;
|
532901be |
if ($content) {
|
a090afa4 |
return $sub->($content, $url);
|
532901be |
} else {
|
a090afa4 |
print "Unable to fetch $url, empty content returned.\n";
|
532901be |
}
}
|
a86b52c5 |
|
a090afa4 |
return undef;
|
532901be |
}
|
53965667 |
my @columns;
sub LoadMirrors {
open(CSV, "<", "include/tor-mirrors.csv") or die "Cannot open tor-mirrors.csv: $!";
my $line = <CSV>;
chomp($line);
@columns = split(/\s*,\s*/, $line);
my @mirrors;
while ($line = <CSV>)
{
chomp($line);
my @values = split(/\s*,\s*/, $line);
my %server;
for (my $i = 0; $i < scalar(@columns); $i++)
{
$server{$columns[$i]} = $values[$i] || '';
}
$server{updateDate} = str2time($server{updateDate}) if ($server{updateDate});
push @mirrors, {%server};
}
close(CSV);
return @mirrors;
}
|
532901be |
|
53965667 |
sub DumpMirrors {
my @m = @_;
|
3dceeeff |
open(CSV, ">", "include/tor-mirrors.csv") or die "Cannot open tor-mirrors.csv: $!";
|
53965667 |
print CSV join(", ", @columns) . "\n";
foreach my $server(@m) {
$server->{updateDate} = gmtime($server->{updateDate}) if ($server->{updateDate});
print CSV join(", ", map($server->{$_}, @columns));
print CSV "\n";
|
fb696fce |
}
|
532901be |
|
53965667 |
close(CSV);
}
my @m = LoadMirrors();
my $count = scalar(@m);
|
532901be |
print "We have a total of $count mirrors\n";
print "Fetching the last updated date for each mirror.\n";
|
53965667 |
my $tortime = Fetch("https://www.torproject.org/project/trace/www-master.torproject.org", \&ExtractDate);
|
a090afa4 |
my @torfiles = Fetch("https://www.torproject.org/dist/", \&ExtractLinks);
my %randomtorfiles;
for (1 .. 1)
{
my $r = int(rand(scalar(@torfiles)));
my $suffix = $torfiles[$r];
$suffix =~ s/^https:\/\/www.torproject.org//;
$randomtorfiles{$suffix} = Fetch($torfiles[$r], \&ExtractSig);
}
print "Using these files for sig matching:\n";
print join("\n", keys %randomtorfiles);
|
f08a2ef2 |
print "\n";
|
a090afa4 |
|
3dceeeff |
# Adjust official Tor time by out-of-date offset: number of days * seconds per day
|
f80d2636 |
$tortime -= 1 * 172800;
|
3d7f7acd |
print "The official time for Tor is $tortime. \n";
|
532901be |
|
53965667 |
for(my $server = 0; $server < scalar(@m); $server++) {
foreach my $serverType('httpWebsiteMirror', 'httpsWebsiteMirror', 'ftpWebsiteMirror', 'httpDistMirror', 'httpsDistMirror')
|
a090afa4 |
{
|
53965667 |
if ($m[$server]->{$serverType}) {
my $updateDate = Fetch("$m[$server]->{$serverType}/project/trace/www-master.torproject.org", \&ExtractDate);
if ($updateDate) {
$m[$server]->{updateDate} = $updateDate;
$m[$server]->{sigMatched} = 1;
|
a090afa4 |
foreach my $randomtorfile(keys %randomtorfiles) {
|
53965667 |
my $sig = Fetch("$m[$server]->{$serverType}/$randomtorfile", \&ExtractSig);
if (!$sig) {
$m[$server]->{sigMatched} = 0;
last;
} elsif ($sig ne $randomtorfiles{$randomtorfile}) {
$m[$server]->{sigMatched} = 0;
last;
}
|
a090afa4 |
}
}
last;
|
53965667 |
}
|
532901be |
}
|
a090afa4 |
}
|
53965667 |
sub PrintServer {
my $server = shift;
|
759dd463 |
print OUT <<"END";
\n<tr>\n
|
f08a2ef2 |
<td>$server->{isoCC}</td>\n
<td>$server->{orgName}</td>\n
|
3dceeeff |
<td>Up to date</td>\n
|
759dd463 |
END
my %prettyNames = (
httpWebsiteMirror => "http",
httpsWebsiteMirror => "https",
ftpWebsiteMirror => "ftp",
rsyncWebsiteMirror => "rsync",
httpDistMirror => "http",
httpsDistMirror => "https",
|
98ae0563 |
rsyncDistMirror => "rsync", );
|
759dd463 |
foreach my $precious ( sort keys %prettyNames )
|
532901be |
{
|
f08a2ef2 |
if ($server->{$precious}) {
|
53965667 |
print OUT " <td><a href=\"" . $server->{$precious} . "\">" .
|
759dd463 |
"$prettyNames{$precious}</a></td>\n";
|
532901be |
} else { print OUT " <td> - </td>\n"; }
}
print OUT "</tr>\n";
}
|
53965667 |
my $outFile = "include/mirrors-table.wmi";
open(OUT, "> $outFile") or die "Can't open $outFile: $!";
# Here's where we open a file and print some wml include goodness
# This is sorted from last known recent update to unknown update times
|
f08a2ef2 |
foreach my $server ( sort { $b->{updateDate} <=> $a->{updateDate} } grep {$_->{updateDate} && $_->{updateDate} > $tortime && $_->{sigMatched}} @m ) {
PrintServer($server);
}
|
53965667 |
DumpMirrors(@m);
|
532901be |
close(OUT);
|