#!/usr/bin/perl
$^W=1;
use strict;
no utf8;
# vi(1) se: tabstop=4
$ENV{LC_ALL}='C';
my $method='https'; # http or https
my $domain='snapshot.debian.org';
my $max_retries=4; # max retries at some possibly transient errors
my $retries_sleep=5; # how long we sleep between retries
# Read (.deb) file argument(s),
# for each, attempt to get size and calculate digest,
# then try to locate in $method://$domain/
# and report results.
my $my_exit=0; # exit value
use File::Basename;
my $prog=$0;
$prog = basename($0);
# block size (in bytes) we'll use
my $bs=4096; # 4 KiB
my $http_status_ok=200;
my $fivexx='\A50[034]\z'; # http 5xx status codes we may retry
@ARGV or die("usage: $prog file [...]\n");
# We use fstat to avoid race conditions between getting relevant [l]stat
# data and open(2).
# Perl doesn't itself give us fstat, so we get that via use POSIX.
# Likewise since we need fd for fstat, we use open and close from POSIX.
my($sha_alg,$digest_re_)=(1,'(?:[\da-f]{40})');
FILE:
for my $file (@ARGV){
my $f=basename($file);
my $ignq='[!#-~]'; # RE for IsGraph but Not Quote (")
my $ignu='[!-^`-~]'; # RE for IsGraph but Not Underscore
my $archre='(?:all|amd64|i386)'; # all + dpkg-architecture -L for full list
$f =~
/
\A
($ignu+)
_
($ignu+) # see also: deb-version(7)
(?:
_
($archre)
)? # Some (old) debian package files may lack architeture in file name.
\.deb
\z
/x
;
# package name, version(raw), architecture:
my ($pkgn,$pkgvr,$arch)=($1,$2,$3);
my $uarch=''; # underscore architecture (or empty if not known)
if(defined($arch)){
$uarch= '_' . $arch;
};
if(!defined($pkgn)){
warn "$prog: bad file name: $file ($f), expecting *?.deb of " .
"possible name, version, and generally known " .
"architecture, skipping\n";
$my_exit=1;
next;
};
my $pkgve=$pkgvr; # to be package epoch
$pkgve =~ s/%3a/:/g; # "cooked" package version to handle epoch
# See also: deb-version(7)
# Note despite deb-version(7) epoch may be multiple digits e.g.:
# fonts-sil-gentium_20081126:1.03-1_all.deb
my $pkgvc; # package version cooked (without epoch)
$pkgve =~ /\A((?:\d+:)?)(.+)\z/;
# package version epoch, and cooked without epoch:
($pkgve,$pkgvc)=($1,$2);
my ($size,$size_re,$digest,$digest_re);
use POSIX();
my $fd=POSIX::open($file,&POSIX::O_RDONLY);
if(defined($fd)){
my $mode;
($mode,$size)=(POSIX::fstat($fd))[2,7];
if(defined($mode) && defined($size)){
use Fcntl ':mode';
if(!S_ISREG($mode)){
$size=undef;
$size_re='\d+';
}else{
$size_re=$size;
use Digest::SHA;
$digest=Digest::SHA->new($sha_alg);
while(1){
my $bytes=POSIX::read($fd,$_,$bs);
if(!defined($bytes) or $bytes<0 or !($bytes==0 or
$bytes>=1))
{
# error (known or unknown?)
warn "$prog: read(open($file)) failed: $!\n";
$my_exit=1;
$digest=undef;
$digest_re=$digest_re_;
}elsif($bytes>=1){
$digest->add($_);
}else{
# $bytes==0 (EOF)
# fall through (calculated digests or failed to)
last;
};
};
};
}else{
$size=undef;
$size_re='\d+';
};
if(!defined(POSIX::close($fd))){
warn "$prog: close(open($file)) failed: $!\n";
$my_exit=1;
$digest=undef;
$digest_re=$digest_re_;
};
}else{
# Failed to open, try stat for size.
# We use stat as POSIX::open would follow sym links
$size=(stat($file))[7];
if(defined($size && -f _)){
$size_re=$size;
}else{
$size=undef;
$size_re='\d+';
};
};
if($size_re !~ /\A\d+\z/){
# Didn't get size (e.g. couldn't open or stat regular file),
# presume we can't read to get (correct) digest.
$digest=undef;
$digest_re=$digest_re_;
}else{
if(defined($digest)){
$digest_re=$digest=$digest->hexdigest;
}else{
$digest_re=$digest_re_;
};
};
use WWW::Mechanize();
my $mech=WWW::Mechanize->new();
use URL::Encode();
my $url=
"$method://$domain/" .
'binary/?bin=' .
URL::Encode::url_encode($pkgn);
$mech->autocheck(0);
my $status;
my $retries=$max_retries;
while(1){
if(!defined($mech->get($url))){
warn "$prog: !defined(\$mech->get(\$url)), skipping $file\n";
$my_exit=1;
next FILE;
}elsif(!defined($status=$mech->status())){
warn "$prog: !defined(\$mech->status())\n, skipping $file\n";
$my_exit=1;
next FILE;
}elsif($status =~ /$fivexx/){
# this error may be transient, so we may retry
if($retries-- > 0){
sleep($retries_sleep);
next;
};
warn "$prog: \$mech->status() != $http_status_ok " .
"(=$status), skipping $file\n";
$my_exit=1;
next FILE;
}elsif($status != $http_status_ok){
warn "$prog: \$mech->status() != $http_status_ok " .
"(=$status), skipping $file\n";
$my_exit=1;
next FILE;
};
last;
};
if (
!defined($mech->follow_link(text_regex =>
qr/\A\Q$pkgve$pkgvc (\Esource: /))
)
{
# Try with (other) possible epoch,
# may have been updated/added later.
if(!defined($mech->follow_link(text_regex =>
qr/\A\d+:\Q$pkgvc (\Esource: /)))
{
warn "$prog: failed to find/follow link for $file, " .
"skipping\n";
$my_exit=1;
next;
};
};
my $retries=$max_retries;
while(1){
if(!defined($status=$mech->status())){
warn "$prog: !defined(\$status), skipping $file\n";
$my_exit=1;
next FILE;
}elsif($status =~ /$fivexx/){
# this error may be transient, so we may retry
if($retries-- > 0){
sleep($retries_sleep);
$mech->reload();
next;
};
warn "$prog: \$mech->status() != $http_status_ok " .
"(=$status), skipping $file\n";
$my_exit=1;
next FILE;
}elsif($status != $http_status_ok){
warn "$prog: \$mech->status() != $http_status_ok " .
"(=$status), skipping $file\n";
$my_exit=1;
next FILE;
};
last;
};
my $s='[\t\n\r ]'; # RE for my more limited whitespace characters
my $uri;
my $re_before_uri=
'\A.*?(' .
$digest_re .
')
:[\t\n\r ]*?
' .
'';
my $re_more_after_uri=
'
\.deb