#!/usr/bin/perl $^W=1; use strict; no utf8; # vi(1) se: tabstop=4 $ENV{LC_ALL}='C'; my $method='https'; # http or https my $domain='snapshot.debian.org'; my $max_retries=4; # max retries at some possibly transient errors my $retries_sleep=5; # how long we sleep between retries # Read (.deb) file argument(s), # for each, attempt to get size and calculate digest, # then try to locate in $method://$domain/ # and report results. my $my_exit=0; # exit value use File::Basename; my $prog=$0; $prog = basename($0); # block size (in bytes) we'll use my $bs=4096; # 4 KiB my $http_status_ok=200; my $fivexx='\A50[034]\z'; # http 5xx status codes we may retry @ARGV or die("usage: $prog file [...]\n"); # We use fstat to avoid race conditions between getting relevant [l]stat # data and open(2). # Perl doesn't itself give us fstat, so we get that via use POSIX. # Likewise since we need fd for fstat, we use open and close from POSIX. my($sha_alg,$digest_re_)=(1,'(?:[\da-f]{40})'); FILE: for my $file (@ARGV){ my $f=basename($file); my $ignq='[!#-~]'; # RE for IsGraph but Not Quote (") my $ignu='[!-^`-~]'; # RE for IsGraph but Not Underscore my $archre='(?:all|amd64|i386)'; # all + dpkg-architecture -L for full list $f =~ / \A ($ignu+) _ ($ignu+) # see also: deb-version(7) (?: _ ($archre) )? # Some (old) debian package files may lack architeture in file name. \.deb \z /x ; # package name, version(raw), architecture: my ($pkgn,$pkgvr,$arch)=($1,$2,$3); my $uarch=''; # underscore architecture (or empty if not known) if(defined($arch)){ $uarch= '_' . $arch; }; if(!defined($pkgn)){ warn "$prog: bad file name: $file ($f), expecting *?.deb of " . "possible name, version, and generally known " . "architecture, skipping\n"; $my_exit=1; next; }; my $pkgve=$pkgvr; # to be package epoch $pkgve =~ s/%3a/:/g; # "cooked" package version to handle epoch # See also: deb-version(7) # Note despite deb-version(7) epoch may be multiple digits e.g.: # fonts-sil-gentium_20081126:1.03-1_all.deb my $pkgvc; # package version cooked (without epoch) $pkgve =~ /\A((?:\d+:)?)(.+)\z/; # package version epoch, and cooked without epoch: ($pkgve,$pkgvc)=($1,$2); my ($size,$size_re,$digest,$digest_re); use POSIX(); my $fd=POSIX::open($file,&POSIX::O_RDONLY); if(defined($fd)){ my $mode; ($mode,$size)=(POSIX::fstat($fd))[2,7]; if(defined($mode) && defined($size)){ use Fcntl ':mode'; if(!S_ISREG($mode)){ $size=undef; $size_re='\d+'; }else{ $size_re=$size; use Digest::SHA; $digest=Digest::SHA->new($sha_alg); while(1){ my $bytes=POSIX::read($fd,$_,$bs); if(!defined($bytes) or $bytes<0 or !($bytes==0 or $bytes>=1)) { # error (known or unknown?) warn "$prog: read(open($file)) failed: $!\n"; $my_exit=1; $digest=undef; $digest_re=$digest_re_; }elsif($bytes>=1){ $digest->add($_); }else{ # $bytes==0 (EOF) # fall through (calculated digests or failed to) last; }; }; }; }else{ $size=undef; $size_re='\d+'; }; if(!defined(POSIX::close($fd))){ warn "$prog: close(open($file)) failed: $!\n"; $my_exit=1; $digest=undef; $digest_re=$digest_re_; }; }else{ # Failed to open, try stat for size. # We use stat as POSIX::open would follow sym links $size=(stat($file))[7]; if(defined($size && -f _)){ $size_re=$size; }else{ $size=undef; $size_re='\d+'; }; }; if($size_re !~ /\A\d+\z/){ # Didn't get size (e.g. couldn't open or stat regular file), # presume we can't read to get (correct) digest. $digest=undef; $digest_re=$digest_re_; }else{ if(defined($digest)){ $digest_re=$digest=$digest->hexdigest; }else{ $digest_re=$digest_re_; }; }; use WWW::Mechanize(); my $mech=WWW::Mechanize->new(); use URL::Encode(); my $url= "$method://$domain/" . 'binary/?bin=' . URL::Encode::url_encode($pkgn); $mech->autocheck(0); my $status; my $retries=$max_retries; while(1){ if(!defined($mech->get($url))){ warn "$prog: !defined(\$mech->get(\$url)), skipping $file\n"; $my_exit=1; next FILE; }elsif(!defined($status=$mech->status())){ warn "$prog: !defined(\$mech->status())\n, skipping $file\n"; $my_exit=1; next FILE; }elsif($status =~ /$fivexx/){ # this error may be transient, so we may retry if($retries-- > 0){ sleep($retries_sleep); next; }; warn "$prog: \$mech->status() != $http_status_ok " . "(=$status), skipping $file\n"; $my_exit=1; next FILE; }elsif($status != $http_status_ok){ warn "$prog: \$mech->status() != $http_status_ok " . "(=$status), skipping $file\n"; $my_exit=1; next FILE; }; last; }; if ( !defined($mech->follow_link(text_regex => qr/\A\Q$pkgve$pkgvc (\Esource: /)) ) { # Try with (other) possible epoch, # may have been updated/added later. if(!defined($mech->follow_link(text_regex => qr/\A\d+:\Q$pkgvc (\Esource: /))) { warn "$prog: failed to find/follow link for $file, " . "skipping\n"; $my_exit=1; next; }; }; my $retries=$max_retries; while(1){ if(!defined($status=$mech->status())){ warn "$prog: !defined(\$status), skipping $file\n"; $my_exit=1; next FILE; }elsif($status =~ /$fivexx/){ # this error may be transient, so we may retry if($retries-- > 0){ sleep($retries_sleep); $mech->reload(); next; }; warn "$prog: \$mech->status() != $http_status_ok " . "(=$status), skipping $file\n"; $my_exit=1; next FILE; }elsif($status != $http_status_ok){ warn "$prog: \$mech->status() != $http_status_ok " . "(=$status), skipping $file\n"; $my_exit=1; next FILE; }; last; }; my $s='[\t\n\r ]'; # RE for my more limited whitespace characters my $uri; my $re_before_uri= '\A.*?(' . $digest_re . '):[\t\n\r ]*?
' . $s . '*?
' . $s . '*?
' . ''; my $re_more_after_uri= ' \.deb
' . $s . '*?
' . $s . '*? Seen\ in\ [-a-z]+\ on\ \d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2}\ in' . $s . '*? ' . $ignq . '+\.' . $s . '*?' . $s . '*? Size:\ (' . $size_re . ')(?:\D|\z) ' ; if($uarch){ $mech->content() =~ qr! $re_before_uri ([^"]+) $re_right_after_uri \Q$pkgn\E_\Q$pkgvc$uarch\E $re_more_after_uri !msx ; ($digest,$uri,$size)=($1,$2,$3); }else{ $mech->content() =~ qr! $re_before_uri ([^"]+) $re_right_after_uri \Q$pkgn\E_\Q$pkgvc\E(?:_$archre)? $re_more_after_uri !msx ; ($digest,$uri,$size)=($1,$2,$3); }; if(!defined($uri)){ warn "$prog: failed to find/follow link for $file, skipping\n"; $my_exit=1; next; }; if(not $uri =~ m!\A/.+!){ warn "$prog: unexpected link: $uri for $file, skipping\n"; $my_exit=1; next; }; # $uri is bare absolute path, add method and domain: $url="$method://$domain$uri"; my $retries=$max_retries; while(1){ if(!defined($mech->head($url))){ warn "$prog: !defined(\$mech->head(\$url)), skipping " . "$file\n"; $my_exit=1; next FILE; }elsif(!defined($status=$mech->status())){ warn "$prog: !defined(\$mech->status())\n, skipping " . "$file\n"; $my_exit=1; next FILE; }elsif($status =~ /$fivexx/){ # this error may be transient, so we may retry if($retries-- > 0){ sleep($retries_sleep); next; }; warn "$prog: \$mech->status() != $http_status_ok " . "(=$status), skipping $file\n"; $my_exit=1; next FILE; }elsif($status != $http_status_ok){ warn "$prog: \$mech->status() != $http_status_ok " . "(=$status), skipping $file\n"; $my_exit=1; next FILE; }; last; }; # HEAD OK, give file and URL, # also give size if we didn't get that from regular file, # and give digest if we didn't get that from file. # Note that etag may not be digest based, # so we can't use that to compare to our calculated digest print "$file $url" . ( $size_re =~ /\A\d+\z/ ? # got size from regular file, don't add it ( $digest_re =~ /\A$digest_re_\z/ ? '' # got digest from file, don't add it : " $digest" # didn't get digest from file add it ) : # didn't get size nor digest from file, add them: " $size $digest" ) . "\n"; }; exit($my_exit);