1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/openpkg/mirror.pl Tue Jul 31 12:23:42 2012 +0200 1.3 @@ -0,0 +1,463 @@ 1.4 +## 1.5 +## mirror.pl -- OpenPKG Package Mirroring 1.6 +## Copyright (c) 2011-2012 OpenPKG GmbH <http://openpkg.com/> 1.7 +## 1.8 +## This software is property of the OpenPKG GmbH, DE MUC HRB 160208. 1.9 +## All rights reserved. Licenses which grant limited permission to use, 1.10 +## copy, modify and distribute this software are available from the 1.11 +## OpenPKG GmbH. 1.12 +## 1.13 +## THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED 1.14 +## WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 1.15 +## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1.16 +## IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR 1.17 +## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.18 +## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.19 +## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 1.20 +## USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 1.21 +## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 1.22 +## OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 1.23 +## OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 1.24 +## SUCH DAMAGE. 1.25 +## 1.26 + 1.27 +require 5.003; 1.28 + 1.29 +# OpenPKG instance prefix and tools 1.30 +my $my_prefix = $ENV{'OPENPKG_PREFIX'}; 1.31 +my $my_rpm = "$my_prefix/bin/openpkg rpm"; 1.32 +my $my_openssl = "$my_prefix/lib/openpkg/openssl"; 1.33 +delete $ENV{'OPENPKG_PREFIX'}; 1.34 + 1.35 +# program identification 1.36 +my $progname = "search"; 1.37 +my $progvers = "0.1.0"; 1.38 + 1.39 +# determine reasonable temporary directory 1.40 +my $tmpdir = ($ENV{"TMPDIR"} || "/tmp"); 1.41 + 1.42 +# use unbuffered stdout 1.43 +$|++; 1.44 + 1.45 +## 1.46 +## COMMAND-LINE INTERFACE HANDLING 1.47 +## 1.48 + 1.49 +# home-brewn getopt(3) style option parser 1.50 +sub getopts ($$@) { 1.51 + my ($opt_spec, $opts, @argv_orig) = @_; 1.52 + my (%optf) = map { m/(\w)/; $1 => $_ } $opt_spec =~ m/(\w:|\w)/g; 1.53 + my (@argv, $optarg); 1.54 + 1.55 + foreach (@argv_orig) { 1.56 + if (@argv) { 1.57 + push @argv, $_; 1.58 + } elsif (defined $optarg) { 1.59 + if (exists $opts->{$optarg}) { 1.60 + $opts->{$optarg} .= " $_"; 1.61 + } else { 1.62 + $opts->{$optarg} = $_; 1.63 + } 1.64 + $optarg = undef; 1.65 + } elsif (!/^[-]/) { 1.66 + push @argv, $_; 1.67 + } else { 1.68 + while (/^\-(\w)(.*)/) { 1.69 + if (exists $optf{$1}) { 1.70 + if (length($optf{$1}) > 1) { 1.71 + if ($2 ne '') { 1.72 + if (exists $opts->{$1}) { 1.73 + $opts->{$1} .= " $2"; 1.74 + } else { 1.75 + $opts->{$1} = $2; 1.76 + } 1.77 + } else { 1.78 + $optarg = $1; 1.79 + } 1.80 + last; 1.81 + } else { 1.82 + $opts->{$1} = 1; 1.83 + } 1.84 + } else { 1.85 + warn "openpkg:$prog_name:WARNING: unknown option $_\n"; 1.86 + } 1.87 + $_ = "-$2"; 1.88 + } 1.89 + } 1.90 + } 1.91 + if (defined $optarg) { 1.92 + warn "openpkg:$prog_name:WARNING: option $optarg requires an argument\n"; 1.93 + } 1.94 + foreach my $opt (keys %optf) { 1.95 + if (not exists $opts->{$opt}) { 1.96 + $opts->{$opt} = (length($optf{$opt}) > 1 ? "" : 0); 1.97 + } 1.98 + } 1.99 + return @argv; 1.100 +} 1.101 + 1.102 +# parse command line options 1.103 +my $opts = {}; 1.104 +@ARGV = getopts("hfqr:p:C:", $opts, @ARGV); 1.105 + 1.106 +# usage sanity check and usage help 1.107 +sub usage { 1.108 + my ($rc) = @_; 1.109 + my $usage = "openpkg:$prog_name:USAGE: openpkg mirror [-h] [-f] [-C <cache-file>] [-r <remote-url>] [-p <remote-url-prefix>] <local-prefix-dir>\n"; 1.110 + if ($rc == 0) { 1.111 + print STDOUT $usage; 1.112 + } 1.113 + else { 1.114 + print STDERR $usage; 1.115 + } 1.116 + exit($rc); 1.117 +} 1.118 +if ($opts->{"h"}) { 1.119 + usage(0); 1.120 +} 1.121 +if (@ARGV != 1) { 1.122 + usage(1); 1.123 +} 1.124 + 1.125 +# determine arguments 1.126 +my ($localprefixdir) = @ARGV; 1.127 +if (not -d $localprefixdir) { 1.128 + print STDERR "openpkg:$prog_name:ERROR: local directory already has to exist: $localprefixdir\n"; 1.129 + exit(1); 1.130 +} 1.131 + 1.132 +# determine start URL 1.133 +my $url = $opts->{"r"} || ""; 1.134 +if ($url eq "") { 1.135 + $url = `$my_prefix/bin/openpkg release --fmt='%u' 2>/dev/null`; 1.136 + $url =~ s/^\s+//s; 1.137 + $url =~ s/\s+$//s; 1.138 +} 1.139 +if ($url eq "") { 1.140 + print STDERR "openpkg:$prog_name:ERROR: no repository URL known\n"; 1.141 + exit(1); 1.142 +} 1.143 + 1.144 +# determine URL prefix 1.145 +my $remoteprefixurl = $opts->{"p"}; 1.146 +if (not defined $remoteprefixurl) { 1.147 + $remoteprefixurl = $url; 1.148 + $remoteprefixurl =~ s/\/([^\/]+)$//s; 1.149 +} 1.150 + 1.151 +## 1.152 +## HELPER FUNCTIONS 1.153 +## 1.154 + 1.155 +# determine size and mtime of a file 1.156 +sub sizetime ($) { 1.157 + my ($file) = @_; 1.158 + my @stat = stat($file); 1.159 + return ($stat[7], $stat[9]); 1.160 +} 1.161 + 1.162 +# determine MD5 of a file 1.163 +sub digest ($) { 1.164 + my ($file) = @_; 1.165 + my $digest = ""; 1.166 + my $output = `($my_openssl md5 -hex "$file") 2>/dev/null || true`; 1.167 + if ($output =~ m/MD5\(.+?\)=\s*([0-9a-fA-F]+)/s) { 1.168 + $digest = uc($1); 1.169 + } 1.170 + return $digest; 1.171 +} 1.172 + 1.173 +# print a byte size in human-readable form 1.174 +sub humansize ($) { 1.175 + my ($bytes) = @_; 1.176 + my $size = ""; 1.177 + if ($bytes > 1024*1024*1024) { $size = sprintf("%.1fGB", $bytes / (1024*1024*1024)); } 1.178 + elsif ($bytes > 1024*1024) { $size = sprintf("%.1fMB", $bytes / (1024*1024) ); } 1.179 + elsif ($bytes > 1024) { $size = sprintf("%.1fKB", $bytes / (1024) ); } 1.180 + else { $size = sprintf("%dB", $bytes ); } 1.181 + return $size; 1.182 +} 1.183 + 1.184 +# caching support: load cache 1.185 +sub cache_load () { 1.186 + my $cache = {}; 1.187 + if ($opts->{"C"}) { 1.188 + if (-f $opts->{"C"}) { 1.189 + # load cache file 1.190 + open(FP, "<" . $opts->{"C"}) 1.191 + or die "unable to load cache file: " . $opts->{"C"}; 1.192 + my $txt = ""; $txt .= $_ while (<FP>); 1.193 + close(FP); 1.194 + 1.195 + # parse cache file 1.196 + $txt =~ s/^\s*(\d+)\s+(\d+)\s+([\da-fA-F]+)\s+(.+)$/ 1.197 + $cache->{$4} = { size => $1, mtime => $2, md5 => $3 }, '' 1.198 + /mge; 1.199 + } 1.200 + } 1.201 + return $cache; 1.202 +} 1.203 + 1.204 +# caching support: save cache 1.205 +sub cache_save ($) { 1.206 + my ($cache) = @_; 1.207 + if ($opts->{"C"}) { 1.208 + # generate cache file 1.209 + my $txt = ""; 1.210 + foreach my $url (sort { $a cmp $b } keys %{$cache}) { 1.211 + $txt .= sprintf("%10d %10d %s %s\n", 1.212 + $cache->{$url}->{"size"}, 1.213 + $cache->{$url}->{"mtime"}, 1.214 + $cache->{$url}->{"md5"}, 1.215 + $url 1.216 + ); 1.217 + } 1.218 + 1.219 + # write cache file 1.220 + open(FP, ">" . $opts->{"C"}) 1.221 + or die "unable to save cache file: " . $opts->{"C"}; 1.222 + print FP $txt; 1.223 + close(FP); 1.224 + } 1.225 + return; 1.226 +} 1.227 + 1.228 +## 1.229 +## DETERMINE LOCAL AND REMOTE FILES 1.230 +## 1.231 + 1.232 +# optionally load the cache 1.233 +my $cache = cache_load(); 1.234 + 1.235 +# determine local files 1.236 +print "++ determining local files\n"; 1.237 +my @local_files = map { 1.238 + s/\r?\n$//s; 1.239 + s/\/$//s; 1.240 + $_; 1.241 +} `find $localprefixdir -depth -print`; 1.242 +print "-- local: " . @local_files . " files\n"; 1.243 + 1.244 +# determine remote files 1.245 +print "++ determining remote files\n"; 1.246 +sub relurl ($$) { 1.247 + my ($url, $suburl) = @_; 1.248 + if ($suburl =~ m/^\w+:\/\//) { 1.249 + $url = $suburl; 1.250 + } 1.251 + elsif ($suburl =~ m/^\//) { 1.252 + $url = "file://$suburl"; 1.253 + } 1.254 + else { 1.255 + $url =~ s/(\/)?\/*[^\/]*$/$1$suburl/; 1.256 + } 1.257 + 1 while ($url =~ s/\/\.\//\//s); 1.258 + 1 while ($url =~ s/\/[^\/]+\/\.\.\//\//s); 1.259 + return $url; 1.260 +} 1.261 +sub parse_attr ($) { 1.262 + my ($xml) = @_; 1.263 + my $info = { href => "", size => -1, mtime => -1, md5 => "-" }; 1.264 + $info->{"href"} = $1 if ($xml =~ m/\bhref="([^"]*)"/s); 1.265 + $info->{"size"} = $1 if ($xml =~ m/\bsize="([^"]*)"/s); 1.266 + $info->{"mtime"} = $1 if ($xml =~ m/\bmtime="([^"]*)"/s); 1.267 + $info->{"md5"} = $1 if ($xml =~ m/\bmd5="([^"]*)"/s); 1.268 + return $info; 1.269 +} 1.270 +sub get_urls ($) { 1.271 + my ($info) = @_; 1.272 + 1.273 + # determine URL 1.274 + my $url = $info->{"href"}; 1.275 + if ($url =~ m/^\//) { 1.276 + $url = "file://$url"; 1.277 + } 1.278 + if ($url =~ m/\/$/) { 1.279 + $url .= "00INDEX.rdf"; 1.280 + } 1.281 + $info->{"href"} = $url; 1.282 + 1.283 + # fetch URL content 1.284 + print "-- fetching XML/RDF index: $url\n"; 1.285 + my $tmpfile = "$tmpdir/openpkg-mirror.tmp"; 1.286 + unlink($tmpfile); 1.287 + my $cmd = "$my_prefix/bin/openpkg curl -k -s -R -o \"$tmpfile\" \"$url\""; 1.288 + system($cmd); 1.289 + my $xml; 1.290 + if ($url =~ m/\.bz2$/) { 1.291 + $xml = `$my_prefix/lib/openpkg/bzip2 -d -c <"$tmpfile"`; 1.292 + } 1.293 + else { 1.294 + $xml = `cat "$tmpfile"`; 1.295 + } 1.296 + 1.297 + # provide information about this URL 1.298 + my @urls = (); 1.299 + if ($info->{"size"} == -1 or $info->{"mtime"} == -1) { 1.300 + my ($S, $T) = sizetime($tmpfile); 1.301 + $info->{"size"} = $S; 1.302 + $info->{"mtime"} = $T; 1.303 + } 1.304 + if ($info->{"md5"} eq "-") { 1.305 + $info->{"md5"} = digest($tmpfile); 1.306 + } 1.307 + push(@urls, $info); 1.308 + unlink($tmpfile); 1.309 + 1.310 + # parse references 1.311 + my @ref1 = (); 1.312 + my @ref2 = (); 1.313 + while ($xml =~ m/<Repository\s+([^>]*href=[^>]*)>/gs) { 1.314 + push(@ref1, $1); 1.315 + } 1.316 + while ($xml =~ m/<rdf:Description\s+([^>]*href=[^>]*)>/gs) { 1.317 + push(@ref2, $1); 1.318 + } 1.319 + 1.320 + # provide information about referenced leave URLs 1.321 + foreach my $ref2 (@ref2) { 1.322 + my $i = parse_attr($ref2); 1.323 + $i->{"href"} = relurl($url, $i->{"href"}); 1.324 + push(@urls, $i); 1.325 + } 1.326 + 1.327 + # provide information about referenced index URLs 1.328 + foreach my $ref1 (@ref1) { 1.329 + my $i = parse_attr($ref1); 1.330 + $i->{"href"} = relurl($url, $i->{"href"}); 1.331 + push(@urls, get_urls($i)); # RECURSION 1.332 + } 1.333 + 1.334 + return @urls; 1.335 +} 1.336 +my @remote_urls = get_urls({ href => $url, size => -1, mtime => -1, md5 => "-" }); 1.337 +my $size = 0; 1.338 +foreach my $url (@remote_urls) { 1.339 + $size += int($url->{"size"}) if ($url->{"size"} > 0); 1.340 +} 1.341 +print "-- remote: " . @remote_urls . " files (" . humansize($size) . ")\n"; 1.342 + 1.343 +## 1.344 +## DOWNLOAD NEW OR UPDATED REMOTE FILES 1.345 +## 1.346 + 1.347 +# fetching remote files 1.348 +print "++ fetching new/updated files\n"; 1.349 +my $local_files_keep = {}; 1.350 +sub keep ($) { 1.351 + my ($path) = @_; 1.352 + # keep path and all its parent paths 1.353 + while ($path ne "") { 1.354 + $local_files_keep->{$path} = 1; 1.355 + $path =~ s/(?:^\/$|\/?[^\/]+$)//s; 1.356 + } 1.357 +} 1.358 +sub mkdir_for_file ($$) { 1.359 + my ($prefix, $path) = @_; 1.360 + $path =~ s/(?:^\/$|\/?[^\/]+$)//s; 1.361 + my @dirs = split("/", $path); 1.362 + $path = $prefix; 1.363 + $path =~ s/\/$//s; 1.364 + foreach my $dir (@dirs) { 1.365 + $path .= "/$dir"; 1.366 + if (not -d $path) { 1.367 + print "-- creating: $path\n"; 1.368 + system("mkdir \"$path\""); 1.369 + } 1.370 + } 1.371 +} 1.372 +foreach my $remote_url (@remote_urls) { 1.373 + my $href = $remote_url->{"href"}; 1.374 + if ($href =~ m/^\Q$remoteprefixurl\E(.+)$/) { 1.375 + # derive local path 1.376 + my $local_path = $1; 1.377 + my $local_file = $localprefixdir . $local_path; 1.378 + keep($local_file); 1.379 + 1.380 + # determine information about local path 1.381 + my $download = 1; 1.382 + my $cache_update = 0; 1.383 + my ($size, $mtime, $md5); 1.384 + if (-f $local_file) { 1.385 + my $c = $cache->{$local_file}; 1.386 + $size = $c->{"size"} if (defined $c and defined $c->{"size"}); 1.387 + $mtime = $c->{"mtime"} if (defined $c and defined $c->{"mtime"}); 1.388 + if (not defined $size or not defined $mtime) { 1.389 + ($size, $mtime) = sizetime($local_file); 1.390 + $cache_update = 1; 1.391 + } 1.392 + if ( $size == $remote_url->{"size"} 1.393 + and $mtime == $remote_url->{"mtime"}) { 1.394 + $md5 = $c->{"md5"} if (defined $c and defined $c->{"md5"}); 1.395 + if (not defined $md5) { 1.396 + $md5 = digest($local_file); 1.397 + $cache_update = 1; 1.398 + } 1.399 + if ($md5 eq $remote_url->{"md5"}) { 1.400 + $download = 0; 1.401 + } 1.402 + } 1.403 + } 1.404 + 1.405 + # optionally download remote file to local path 1.406 + if ($download) { 1.407 + print "-- fetching: $local_path\n"; 1.408 + mkdir_for_file($localprefixdir, $local_path); 1.409 + my $cmd = "$my_prefix/bin/openpkg curl"; 1.410 + $cmd .= " -s" if ( $opts->{"q"}); 1.411 + $cmd .= " \"-#\"" if (not $opts->{"q"}); 1.412 + $cmd .= " -k -R -o \"$local_file\" \"$href\""; 1.413 + system($cmd); 1.414 + ($size, $mtime) = sizetime($local_file); 1.415 + $md5 = digest($local_file); 1.416 + $cache_update = 1; 1.417 + } 1.418 + 1.419 + # optionally update the size/mtime/md5 cache 1.420 + if ($opts->{"C"} and $cache_update) { 1.421 + $cache->{$local_file} = { size => $size, mtime => $mtime, md5 => $md5 }; 1.422 + cache_save($cache); 1.423 + } 1.424 + } 1.425 + else { 1.426 + print "** WARNING: URL \"$href\" not under prefix \"$remoteprefixurl\": " . 1.427 + "unable to derive local path -- skipping URL\n"; 1.428 + } 1.429 +} 1.430 + 1.431 +## 1.432 +## REMOVE OBSOLETE LOCAL FILES 1.433 +## 1.434 + 1.435 +# remove obsolete files 1.436 +print "++ removing obsolete files\n"; 1.437 +my $lf = @local_files; 1.438 +my $lfk = keys %{$local_files_keep}; 1.439 +if (not $opts->{"f"} and ($lf - $lfk) > ($lf * 0.3)) { 1.440 + print "** WARNING: more than 30% of your existing local files would be removed (require option -f)\n"; 1.441 +} 1.442 +else { 1.443 + foreach my $local_file (@local_files) { 1.444 + if (not exists $local_files_keep->{$local_file}) { 1.445 + if (-f $local_file) { 1.446 + print "-- removing: $local_file (file)\n"; 1.447 + system("rm \"$local_file\" >/dev/null 2>&1 || true"); 1.448 + } 1.449 + elsif (-d $local_file) { 1.450 + print "-- removing: $local_file (dir)\n"; 1.451 + system("rmdir \"$local_file\" >/dev/null 2>&1 || true"); 1.452 + } 1.453 + } 1.454 + } 1.455 +} 1.456 + 1.457 +## 1.458 +## CLEANUP 1.459 +## 1.460 + 1.461 +# optionally flush the cache 1.462 +cache_save($cache); 1.463 + 1.464 +# exit gracefully 1.465 +exit(0); 1.466 +