openpkg/mirror.pl

Mon, 28 Jan 2013 17:37:18 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Mon, 28 Jan 2013 17:37:18 +0100
changeset 758
a2c6460cfb16
permissions
-rw-r--r--

Correct socket error reporting improvement with IPv6 portable code,
after helpful recommendation by Saúl Ibarra Corretgé on OSips devlist.

michael@428 1 ##
michael@428 2 ## mirror.pl -- OpenPKG Package Mirroring
michael@428 3 ## Copyright (c) 2011-2012 OpenPKG GmbH <http://openpkg.com/>
michael@428 4 ##
michael@428 5 ## This software is property of the OpenPKG GmbH, DE MUC HRB 160208.
michael@428 6 ## All rights reserved. Licenses which grant limited permission to use,
michael@428 7 ## copy, modify and distribute this software are available from the
michael@428 8 ## OpenPKG GmbH.
michael@428 9 ##
michael@428 10 ## THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
michael@428 11 ## WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
michael@428 12 ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
michael@428 13 ## IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
michael@428 14 ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
michael@428 15 ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
michael@428 16 ## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
michael@428 17 ## USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
michael@428 18 ## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
michael@428 19 ## OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
michael@428 20 ## OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
michael@428 21 ## SUCH DAMAGE.
michael@428 22 ##
michael@428 23
michael@428 24 require 5.003;
michael@428 25
michael@428 26 # OpenPKG instance prefix and tools
michael@428 27 my $my_prefix = $ENV{'OPENPKG_PREFIX'};
michael@428 28 my $my_rpm = "$my_prefix/bin/openpkg rpm";
michael@428 29 my $my_openssl = "$my_prefix/lib/openpkg/openssl";
michael@428 30 delete $ENV{'OPENPKG_PREFIX'};
michael@428 31
michael@428 32 # program identification
michael@428 33 my $progname = "search";
michael@428 34 my $progvers = "0.1.0";
michael@428 35
michael@428 36 # determine reasonable temporary directory
michael@428 37 my $tmpdir = ($ENV{"TMPDIR"} || "/tmp");
michael@428 38
michael@428 39 # use unbuffered stdout
michael@428 40 $|++;
michael@428 41
michael@428 42 ##
michael@428 43 ## COMMAND-LINE INTERFACE HANDLING
michael@428 44 ##
michael@428 45
michael@428 46 # home-brewn getopt(3) style option parser
michael@428 47 sub getopts ($$@) {
michael@428 48 my ($opt_spec, $opts, @argv_orig) = @_;
michael@428 49 my (%optf) = map { m/(\w)/; $1 => $_ } $opt_spec =~ m/(\w:|\w)/g;
michael@428 50 my (@argv, $optarg);
michael@428 51
michael@428 52 foreach (@argv_orig) {
michael@428 53 if (@argv) {
michael@428 54 push @argv, $_;
michael@428 55 } elsif (defined $optarg) {
michael@428 56 if (exists $opts->{$optarg}) {
michael@428 57 $opts->{$optarg} .= " $_";
michael@428 58 } else {
michael@428 59 $opts->{$optarg} = $_;
michael@428 60 }
michael@428 61 $optarg = undef;
michael@428 62 } elsif (!/^[-]/) {
michael@428 63 push @argv, $_;
michael@428 64 } else {
michael@428 65 while (/^\-(\w)(.*)/) {
michael@428 66 if (exists $optf{$1}) {
michael@428 67 if (length($optf{$1}) > 1) {
michael@428 68 if ($2 ne '') {
michael@428 69 if (exists $opts->{$1}) {
michael@428 70 $opts->{$1} .= " $2";
michael@428 71 } else {
michael@428 72 $opts->{$1} = $2;
michael@428 73 }
michael@428 74 } else {
michael@428 75 $optarg = $1;
michael@428 76 }
michael@428 77 last;
michael@428 78 } else {
michael@428 79 $opts->{$1} = 1;
michael@428 80 }
michael@428 81 } else {
michael@428 82 warn "openpkg:$prog_name:WARNING: unknown option $_\n";
michael@428 83 }
michael@428 84 $_ = "-$2";
michael@428 85 }
michael@428 86 }
michael@428 87 }
michael@428 88 if (defined $optarg) {
michael@428 89 warn "openpkg:$prog_name:WARNING: option $optarg requires an argument\n";
michael@428 90 }
michael@428 91 foreach my $opt (keys %optf) {
michael@428 92 if (not exists $opts->{$opt}) {
michael@428 93 $opts->{$opt} = (length($optf{$opt}) > 1 ? "" : 0);
michael@428 94 }
michael@428 95 }
michael@428 96 return @argv;
michael@428 97 }
michael@428 98
michael@428 99 # parse command line options
michael@428 100 my $opts = {};
michael@428 101 @ARGV = getopts("hfqr:p:C:", $opts, @ARGV);
michael@428 102
michael@428 103 # usage sanity check and usage help
michael@428 104 sub usage {
michael@428 105 my ($rc) = @_;
michael@428 106 my $usage = "openpkg:$prog_name:USAGE: openpkg mirror [-h] [-f] [-C <cache-file>] [-r <remote-url>] [-p <remote-url-prefix>] <local-prefix-dir>\n";
michael@428 107 if ($rc == 0) {
michael@428 108 print STDOUT $usage;
michael@428 109 }
michael@428 110 else {
michael@428 111 print STDERR $usage;
michael@428 112 }
michael@428 113 exit($rc);
michael@428 114 }
michael@428 115 if ($opts->{"h"}) {
michael@428 116 usage(0);
michael@428 117 }
michael@428 118 if (@ARGV != 1) {
michael@428 119 usage(1);
michael@428 120 }
michael@428 121
michael@428 122 # determine arguments
michael@428 123 my ($localprefixdir) = @ARGV;
michael@428 124 if (not -d $localprefixdir) {
michael@428 125 print STDERR "openpkg:$prog_name:ERROR: local directory already has to exist: $localprefixdir\n";
michael@428 126 exit(1);
michael@428 127 }
michael@428 128
michael@428 129 # determine start URL
michael@428 130 my $url = $opts->{"r"} || "";
michael@428 131 if ($url eq "") {
michael@428 132 $url = `$my_prefix/bin/openpkg release --fmt='%u' 2>/dev/null`;
michael@428 133 $url =~ s/^\s+//s;
michael@428 134 $url =~ s/\s+$//s;
michael@428 135 }
michael@428 136 if ($url eq "") {
michael@428 137 print STDERR "openpkg:$prog_name:ERROR: no repository URL known\n";
michael@428 138 exit(1);
michael@428 139 }
michael@428 140
michael@428 141 # determine URL prefix
michael@428 142 my $remoteprefixurl = $opts->{"p"};
michael@428 143 if (not defined $remoteprefixurl) {
michael@428 144 $remoteprefixurl = $url;
michael@428 145 $remoteprefixurl =~ s/\/([^\/]+)$//s;
michael@428 146 }
michael@428 147
michael@428 148 ##
michael@428 149 ## HELPER FUNCTIONS
michael@428 150 ##
michael@428 151
michael@428 152 # determine size and mtime of a file
michael@428 153 sub sizetime ($) {
michael@428 154 my ($file) = @_;
michael@428 155 my @stat = stat($file);
michael@428 156 return ($stat[7], $stat[9]);
michael@428 157 }
michael@428 158
michael@428 159 # determine MD5 of a file
michael@428 160 sub digest ($) {
michael@428 161 my ($file) = @_;
michael@428 162 my $digest = "";
michael@428 163 my $output = `($my_openssl md5 -hex "$file") 2>/dev/null || true`;
michael@428 164 if ($output =~ m/MD5\(.+?\)=\s*([0-9a-fA-F]+)/s) {
michael@428 165 $digest = uc($1);
michael@428 166 }
michael@428 167 return $digest;
michael@428 168 }
michael@428 169
michael@428 170 # print a byte size in human-readable form
michael@428 171 sub humansize ($) {
michael@428 172 my ($bytes) = @_;
michael@428 173 my $size = "";
michael@428 174 if ($bytes > 1024*1024*1024) { $size = sprintf("%.1fGB", $bytes / (1024*1024*1024)); }
michael@428 175 elsif ($bytes > 1024*1024) { $size = sprintf("%.1fMB", $bytes / (1024*1024) ); }
michael@428 176 elsif ($bytes > 1024) { $size = sprintf("%.1fKB", $bytes / (1024) ); }
michael@428 177 else { $size = sprintf("%dB", $bytes ); }
michael@428 178 return $size;
michael@428 179 }
michael@428 180
michael@428 181 # caching support: load cache
michael@428 182 sub cache_load () {
michael@428 183 my $cache = {};
michael@428 184 if ($opts->{"C"}) {
michael@428 185 if (-f $opts->{"C"}) {
michael@428 186 # load cache file
michael@428 187 open(FP, "<" . $opts->{"C"})
michael@428 188 or die "unable to load cache file: " . $opts->{"C"};
michael@428 189 my $txt = ""; $txt .= $_ while (<FP>);
michael@428 190 close(FP);
michael@428 191
michael@428 192 # parse cache file
michael@428 193 $txt =~ s/^\s*(\d+)\s+(\d+)\s+([\da-fA-F]+)\s+(.+)$/
michael@428 194 $cache->{$4} = { size => $1, mtime => $2, md5 => $3 }, ''
michael@428 195 /mge;
michael@428 196 }
michael@428 197 }
michael@428 198 return $cache;
michael@428 199 }
michael@428 200
michael@428 201 # caching support: save cache
michael@428 202 sub cache_save ($) {
michael@428 203 my ($cache) = @_;
michael@428 204 if ($opts->{"C"}) {
michael@428 205 # generate cache file
michael@428 206 my $txt = "";
michael@428 207 foreach my $url (sort { $a cmp $b } keys %{$cache}) {
michael@428 208 $txt .= sprintf("%10d %10d %s %s\n",
michael@428 209 $cache->{$url}->{"size"},
michael@428 210 $cache->{$url}->{"mtime"},
michael@428 211 $cache->{$url}->{"md5"},
michael@428 212 $url
michael@428 213 );
michael@428 214 }
michael@428 215
michael@428 216 # write cache file
michael@428 217 open(FP, ">" . $opts->{"C"})
michael@428 218 or die "unable to save cache file: " . $opts->{"C"};
michael@428 219 print FP $txt;
michael@428 220 close(FP);
michael@428 221 }
michael@428 222 return;
michael@428 223 }
michael@428 224
michael@428 225 ##
michael@428 226 ## DETERMINE LOCAL AND REMOTE FILES
michael@428 227 ##
michael@428 228
michael@428 229 # optionally load the cache
michael@428 230 my $cache = cache_load();
michael@428 231
michael@428 232 # determine local files
michael@428 233 print "++ determining local files\n";
michael@428 234 my @local_files = map {
michael@428 235 s/\r?\n$//s;
michael@428 236 s/\/$//s;
michael@428 237 $_;
michael@428 238 } `find $localprefixdir -depth -print`;
michael@428 239 print "-- local: " . @local_files . " files\n";
michael@428 240
michael@428 241 # determine remote files
michael@428 242 print "++ determining remote files\n";
michael@428 243 sub relurl ($$) {
michael@428 244 my ($url, $suburl) = @_;
michael@428 245 if ($suburl =~ m/^\w+:\/\//) {
michael@428 246 $url = $suburl;
michael@428 247 }
michael@428 248 elsif ($suburl =~ m/^\//) {
michael@428 249 $url = "file://$suburl";
michael@428 250 }
michael@428 251 else {
michael@428 252 $url =~ s/(\/)?\/*[^\/]*$/$1$suburl/;
michael@428 253 }
michael@428 254 1 while ($url =~ s/\/\.\//\//s);
michael@428 255 1 while ($url =~ s/\/[^\/]+\/\.\.\//\//s);
michael@428 256 return $url;
michael@428 257 }
michael@428 258 sub parse_attr ($) {
michael@428 259 my ($xml) = @_;
michael@428 260 my $info = { href => "", size => -1, mtime => -1, md5 => "-" };
michael@428 261 $info->{"href"} = $1 if ($xml =~ m/\bhref="([^"]*)"/s);
michael@428 262 $info->{"size"} = $1 if ($xml =~ m/\bsize="([^"]*)"/s);
michael@428 263 $info->{"mtime"} = $1 if ($xml =~ m/\bmtime="([^"]*)"/s);
michael@428 264 $info->{"md5"} = $1 if ($xml =~ m/\bmd5="([^"]*)"/s);
michael@428 265 return $info;
michael@428 266 }
michael@428 267 sub get_urls ($) {
michael@428 268 my ($info) = @_;
michael@428 269
michael@428 270 # determine URL
michael@428 271 my $url = $info->{"href"};
michael@428 272 if ($url =~ m/^\//) {
michael@428 273 $url = "file://$url";
michael@428 274 }
michael@428 275 if ($url =~ m/\/$/) {
michael@428 276 $url .= "00INDEX.rdf";
michael@428 277 }
michael@428 278 $info->{"href"} = $url;
michael@428 279
michael@428 280 # fetch URL content
michael@428 281 print "-- fetching XML/RDF index: $url\n";
michael@428 282 my $tmpfile = "$tmpdir/openpkg-mirror.tmp";
michael@428 283 unlink($tmpfile);
michael@428 284 my $cmd = "$my_prefix/bin/openpkg curl -k -s -R -o \"$tmpfile\" \"$url\"";
michael@428 285 system($cmd);
michael@428 286 my $xml;
michael@428 287 if ($url =~ m/\.bz2$/) {
michael@428 288 $xml = `$my_prefix/lib/openpkg/bzip2 -d -c <"$tmpfile"`;
michael@428 289 }
michael@428 290 else {
michael@428 291 $xml = `cat "$tmpfile"`;
michael@428 292 }
michael@428 293
michael@428 294 # provide information about this URL
michael@428 295 my @urls = ();
michael@428 296 if ($info->{"size"} == -1 or $info->{"mtime"} == -1) {
michael@428 297 my ($S, $T) = sizetime($tmpfile);
michael@428 298 $info->{"size"} = $S;
michael@428 299 $info->{"mtime"} = $T;
michael@428 300 }
michael@428 301 if ($info->{"md5"} eq "-") {
michael@428 302 $info->{"md5"} = digest($tmpfile);
michael@428 303 }
michael@428 304 push(@urls, $info);
michael@428 305 unlink($tmpfile);
michael@428 306
michael@428 307 # parse references
michael@428 308 my @ref1 = ();
michael@428 309 my @ref2 = ();
michael@428 310 while ($xml =~ m/<Repository\s+([^>]*href=[^>]*)>/gs) {
michael@428 311 push(@ref1, $1);
michael@428 312 }
michael@428 313 while ($xml =~ m/<rdf:Description\s+([^>]*href=[^>]*)>/gs) {
michael@428 314 push(@ref2, $1);
michael@428 315 }
michael@428 316
michael@428 317 # provide information about referenced leave URLs
michael@428 318 foreach my $ref2 (@ref2) {
michael@428 319 my $i = parse_attr($ref2);
michael@428 320 $i->{"href"} = relurl($url, $i->{"href"});
michael@428 321 push(@urls, $i);
michael@428 322 }
michael@428 323
michael@428 324 # provide information about referenced index URLs
michael@428 325 foreach my $ref1 (@ref1) {
michael@428 326 my $i = parse_attr($ref1);
michael@428 327 $i->{"href"} = relurl($url, $i->{"href"});
michael@428 328 push(@urls, get_urls($i)); # RECURSION
michael@428 329 }
michael@428 330
michael@428 331 return @urls;
michael@428 332 }
michael@428 333 my @remote_urls = get_urls({ href => $url, size => -1, mtime => -1, md5 => "-" });
michael@428 334 my $size = 0;
michael@428 335 foreach my $url (@remote_urls) {
michael@428 336 $size += int($url->{"size"}) if ($url->{"size"} > 0);
michael@428 337 }
michael@428 338 print "-- remote: " . @remote_urls . " files (" . humansize($size) . ")\n";
michael@428 339
michael@428 340 ##
michael@428 341 ## DOWNLOAD NEW OR UPDATED REMOTE FILES
michael@428 342 ##
michael@428 343
michael@428 344 # fetching remote files
michael@428 345 print "++ fetching new/updated files\n";
michael@428 346 my $local_files_keep = {};
michael@428 347 sub keep ($) {
michael@428 348 my ($path) = @_;
michael@428 349 # keep path and all its parent paths
michael@428 350 while ($path ne "") {
michael@428 351 $local_files_keep->{$path} = 1;
michael@428 352 $path =~ s/(?:^\/$|\/?[^\/]+$)//s;
michael@428 353 }
michael@428 354 }
michael@428 355 sub mkdir_for_file ($$) {
michael@428 356 my ($prefix, $path) = @_;
michael@428 357 $path =~ s/(?:^\/$|\/?[^\/]+$)//s;
michael@428 358 my @dirs = split("/", $path);
michael@428 359 $path = $prefix;
michael@428 360 $path =~ s/\/$//s;
michael@428 361 foreach my $dir (@dirs) {
michael@428 362 $path .= "/$dir";
michael@428 363 if (not -d $path) {
michael@428 364 print "-- creating: $path\n";
michael@428 365 system("mkdir \"$path\"");
michael@428 366 }
michael@428 367 }
michael@428 368 }
michael@428 369 foreach my $remote_url (@remote_urls) {
michael@428 370 my $href = $remote_url->{"href"};
michael@428 371 if ($href =~ m/^\Q$remoteprefixurl\E(.+)$/) {
michael@428 372 # derive local path
michael@428 373 my $local_path = $1;
michael@428 374 my $local_file = $localprefixdir . $local_path;
michael@428 375 keep($local_file);
michael@428 376
michael@428 377 # determine information about local path
michael@428 378 my $download = 1;
michael@428 379 my $cache_update = 0;
michael@428 380 my ($size, $mtime, $md5);
michael@428 381 if (-f $local_file) {
michael@428 382 my $c = $cache->{$local_file};
michael@428 383 $size = $c->{"size"} if (defined $c and defined $c->{"size"});
michael@428 384 $mtime = $c->{"mtime"} if (defined $c and defined $c->{"mtime"});
michael@428 385 if (not defined $size or not defined $mtime) {
michael@428 386 ($size, $mtime) = sizetime($local_file);
michael@428 387 $cache_update = 1;
michael@428 388 }
michael@428 389 if ( $size == $remote_url->{"size"}
michael@428 390 and $mtime == $remote_url->{"mtime"}) {
michael@428 391 $md5 = $c->{"md5"} if (defined $c and defined $c->{"md5"});
michael@428 392 if (not defined $md5) {
michael@428 393 $md5 = digest($local_file);
michael@428 394 $cache_update = 1;
michael@428 395 }
michael@428 396 if ($md5 eq $remote_url->{"md5"}) {
michael@428 397 $download = 0;
michael@428 398 }
michael@428 399 }
michael@428 400 }
michael@428 401
michael@428 402 # optionally download remote file to local path
michael@428 403 if ($download) {
michael@428 404 print "-- fetching: $local_path\n";
michael@428 405 mkdir_for_file($localprefixdir, $local_path);
michael@428 406 my $cmd = "$my_prefix/bin/openpkg curl";
michael@428 407 $cmd .= " -s" if ( $opts->{"q"});
michael@428 408 $cmd .= " \"-#\"" if (not $opts->{"q"});
michael@428 409 $cmd .= " -k -R -o \"$local_file\" \"$href\"";
michael@428 410 system($cmd);
michael@428 411 ($size, $mtime) = sizetime($local_file);
michael@428 412 $md5 = digest($local_file);
michael@428 413 $cache_update = 1;
michael@428 414 }
michael@428 415
michael@428 416 # optionally update the size/mtime/md5 cache
michael@428 417 if ($opts->{"C"} and $cache_update) {
michael@428 418 $cache->{$local_file} = { size => $size, mtime => $mtime, md5 => $md5 };
michael@428 419 cache_save($cache);
michael@428 420 }
michael@428 421 }
michael@428 422 else {
michael@428 423 print "** WARNING: URL \"$href\" not under prefix \"$remoteprefixurl\": " .
michael@428 424 "unable to derive local path -- skipping URL\n";
michael@428 425 }
michael@428 426 }
michael@428 427
michael@428 428 ##
michael@428 429 ## REMOVE OBSOLETE LOCAL FILES
michael@428 430 ##
michael@428 431
michael@428 432 # remove obsolete files
michael@428 433 print "++ removing obsolete files\n";
michael@428 434 my $lf = @local_files;
michael@428 435 my $lfk = keys %{$local_files_keep};
michael@428 436 if (not $opts->{"f"} and ($lf - $lfk) > ($lf * 0.3)) {
michael@428 437 print "** WARNING: more than 30% of your existing local files would be removed (require option -f)\n";
michael@428 438 }
michael@428 439 else {
michael@428 440 foreach my $local_file (@local_files) {
michael@428 441 if (not exists $local_files_keep->{$local_file}) {
michael@428 442 if (-f $local_file) {
michael@428 443 print "-- removing: $local_file (file)\n";
michael@428 444 system("rm \"$local_file\" >/dev/null 2>&1 || true");
michael@428 445 }
michael@428 446 elsif (-d $local_file) {
michael@428 447 print "-- removing: $local_file (dir)\n";
michael@428 448 system("rmdir \"$local_file\" >/dev/null 2>&1 || true");
michael@428 449 }
michael@428 450 }
michael@428 451 }
michael@428 452 }
michael@428 453
michael@428 454 ##
michael@428 455 ## CLEANUP
michael@428 456 ##
michael@428 457
michael@428 458 # optionally flush the cache
michael@428 459 cache_save($cache);
michael@428 460
michael@428 461 # exit gracefully
michael@428 462 exit(0);
michael@428 463

mercurial