openpkg/mirror.pl

Mon, 28 Jan 2013 17:37:18 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Mon, 28 Jan 2013 17:37:18 +0100
changeset 758
a2c6460cfb16
permissions
-rw-r--r--

Correct socket error reporting improvement with IPv6 portable code,
after helpful recommendation by Saúl Ibarra Corretgé on OSips devlist.

     1 ##
     2 ##  mirror.pl -- OpenPKG Package Mirroring
     3 ##  Copyright (c) 2011-2012 OpenPKG GmbH <http://openpkg.com/>
     4 ##
     5 ##  This software is property of the OpenPKG GmbH, DE MUC HRB 160208.
     6 ##  All rights reserved. Licenses which grant limited permission to use,
     7 ##  copy, modify and distribute this software are available from the
     8 ##  OpenPKG GmbH.
     9 ##
    10 ##  THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
    11 ##  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
    12 ##  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
    13 ##  IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
    14 ##  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    15 ##  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    16 ##  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
    17 ##  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
    18 ##  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    19 ##  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
    20 ##  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    21 ##  SUCH DAMAGE.
    22 ##
    24 require 5.003;
    26 #   OpenPKG instance prefix and tools
    27 my $my_prefix = $ENV{'OPENPKG_PREFIX'};
    28 my $my_rpm = "$my_prefix/bin/openpkg rpm";
    29 my $my_openssl = "$my_prefix/lib/openpkg/openssl";
    30 delete $ENV{'OPENPKG_PREFIX'};
    32 #   program identification
    33 my $progname  = "search";
    34 my $progvers  = "0.1.0";
    36 #   determine reasonable temporary directory
    37 my $tmpdir = ($ENV{"TMPDIR"} || "/tmp");
    39 #   use unbuffered stdout
    40 $|++;
    42 ##
    43 ##  COMMAND-LINE INTERFACE HANDLING
    44 ##
    46 #   home-brewn getopt(3) style option parser
    47 sub getopts ($$@) {
    48     my ($opt_spec, $opts, @argv_orig) = @_;
    49     my (%optf) = map { m/(\w)/; $1 => $_ } $opt_spec =~ m/(\w:|\w)/g;
    50     my (@argv, $optarg);
    52     foreach (@argv_orig) {
    53         if (@argv) {
    54             push @argv, $_;
    55         } elsif (defined $optarg) {
    56             if (exists $opts->{$optarg}) {
    57                 $opts->{$optarg} .= " $_";
    58             } else {
    59                 $opts->{$optarg} = $_;
    60             }
    61             $optarg = undef;
    62         } elsif (!/^[-]/) {
    63             push @argv, $_;
    64         } else {
    65             while (/^\-(\w)(.*)/) {
    66                 if (exists $optf{$1}) {
    67                     if (length($optf{$1}) > 1) {
    68                         if ($2 ne '') {
    69                             if (exists $opts->{$1}) {
    70                                 $opts->{$1} .= " $2";
    71                             } else {
    72                                 $opts->{$1} = $2;
    73                             }
    74                         } else {
    75                             $optarg = $1;
    76                         }
    77                         last;
    78                     } else {
    79                         $opts->{$1} = 1;
    80                     }
    81                 } else {
    82                     warn "openpkg:$prog_name:WARNING: unknown option $_\n";
    83                 }
    84                 $_ = "-$2";
    85             }
    86         }
    87     }
    88     if (defined $optarg) {
    89         warn "openpkg:$prog_name:WARNING: option $optarg requires an argument\n";
    90     }
    91     foreach my $opt (keys %optf) {
    92         if (not exists $opts->{$opt}) {
    93             $opts->{$opt} = (length($optf{$opt}) > 1 ? "" : 0);
    94         }
    95     }
    96     return @argv;
    97 }
    99 #   parse command line options
   100 my $opts = {};
   101 @ARGV = getopts("hfqr:p:C:", $opts, @ARGV);
   103 #   usage sanity check and usage help
   104 sub usage {
   105     my ($rc) = @_;
   106     my $usage = "openpkg:$prog_name:USAGE: openpkg mirror [-h] [-f] [-C <cache-file>] [-r <remote-url>] [-p <remote-url-prefix>] <local-prefix-dir>\n";
   107     if ($rc == 0) {
   108         print STDOUT $usage;
   109     }
   110     else {
   111         print STDERR $usage;
   112     }
   113     exit($rc);
   114 }
   115 if ($opts->{"h"}) {
   116     usage(0);
   117 }
   118 if (@ARGV != 1) {
   119     usage(1);
   120 }
   122 #   determine arguments
   123 my ($localprefixdir) = @ARGV;
   124 if (not -d $localprefixdir) {
   125     print STDERR "openpkg:$prog_name:ERROR: local directory already has to exist: $localprefixdir\n";
   126     exit(1);
   127 }
   129 #   determine start URL
   130 my $url = $opts->{"r"} || "";
   131 if ($url eq "") {
   132     $url = `$my_prefix/bin/openpkg release --fmt='%u' 2>/dev/null`;
   133     $url =~ s/^\s+//s;
   134     $url =~ s/\s+$//s;
   135 }
   136 if ($url eq "") {
   137     print STDERR "openpkg:$prog_name:ERROR: no repository URL known\n";
   138     exit(1);
   139 }
   141 #   determine URL prefix
   142 my $remoteprefixurl = $opts->{"p"};
   143 if (not defined $remoteprefixurl) {
   144     $remoteprefixurl = $url;
   145     $remoteprefixurl =~ s/\/([^\/]+)$//s;
   146 }
   148 ##
   149 ##  HELPER FUNCTIONS
   150 ##
   152 #   determine size and mtime of a file
   153 sub sizetime ($) {
   154     my ($file) = @_;
   155     my @stat = stat($file);
   156     return ($stat[7], $stat[9]);
   157 }
   159 #   determine MD5 of a file
   160 sub digest ($) {
   161     my ($file) = @_;
   162     my $digest = "";
   163     my $output = `($my_openssl md5 -hex "$file") 2>/dev/null || true`;
   164     if ($output =~ m/MD5\(.+?\)=\s*([0-9a-fA-F]+)/s) {
   165         $digest = uc($1);
   166     }
   167     return $digest;
   168 }
   170 #   print a byte size in human-readable form
   171 sub humansize ($) {
   172     my ($bytes) = @_;
   173     my $size = "";
   174     if    ($bytes > 1024*1024*1024) { $size = sprintf("%.1fGB", $bytes / (1024*1024*1024)); }
   175     elsif ($bytes > 1024*1024)      { $size = sprintf("%.1fMB", $bytes / (1024*1024)     ); }
   176     elsif ($bytes > 1024)           { $size = sprintf("%.1fKB", $bytes / (1024)          ); }
   177     else                            { $size = sprintf("%dB",    $bytes                   ); }
   178     return $size;
   179 }
   181 #   caching support: load cache
   182 sub cache_load () {
   183     my $cache = {};
   184     if ($opts->{"C"}) {
   185         if (-f $opts->{"C"}) {
   186             #   load cache file
   187             open(FP, "<" . $opts->{"C"})
   188                 or die "unable to load cache file: " . $opts->{"C"};
   189             my $txt = ""; $txt .= $_ while (<FP>);
   190             close(FP);
   192             #   parse cache file
   193             $txt =~ s/^\s*(\d+)\s+(\d+)\s+([\da-fA-F]+)\s+(.+)$/
   194                 $cache->{$4} = { size => $1, mtime => $2, md5 => $3 }, ''
   195             /mge;
   196         }
   197     }
   198     return $cache;
   199 }
   201 #   caching support: save cache
   202 sub cache_save ($) {
   203     my ($cache) = @_;
   204     if ($opts->{"C"}) {
   205         #   generate cache file
   206         my $txt = "";
   207         foreach my $url (sort { $a cmp $b } keys %{$cache}) {
   208             $txt .= sprintf("%10d %10d %s %s\n",
   209                 $cache->{$url}->{"size"},
   210                 $cache->{$url}->{"mtime"},
   211                 $cache->{$url}->{"md5"},
   212                 $url
   213             );
   214         }
   216         #   write cache file
   217         open(FP, ">" . $opts->{"C"})
   218             or die "unable to save cache file: " . $opts->{"C"};
   219         print FP $txt;
   220         close(FP);
   221     }
   222     return;
   223 }
   225 ##
   226 ##   DETERMINE LOCAL AND REMOTE FILES
   227 ##
   229 #   optionally load the cache
   230 my $cache = cache_load();
   232 #   determine local files
   233 print "++ determining local files\n";
   234 my @local_files = map {
   235     s/\r?\n$//s;
   236     s/\/$//s;
   237     $_;
   238 } `find $localprefixdir -depth -print`;
   239 print "-- local: " . @local_files . " files\n";
   241 #   determine remote files
   242 print "++ determining remote files\n";
   243 sub relurl ($$) {
   244     my ($url, $suburl) = @_;
   245     if ($suburl =~ m/^\w+:\/\//) {
   246         $url = $suburl;
   247     }
   248     elsif ($suburl =~ m/^\//) {
   249         $url = "file://$suburl";
   250     }
   251     else {
   252         $url =~ s/(\/)?\/*[^\/]*$/$1$suburl/;
   253     }
   254     1 while ($url =~ s/\/\.\//\//s);
   255     1 while ($url =~ s/\/[^\/]+\/\.\.\//\//s);
   256     return $url;
   257 }
   258 sub parse_attr ($) {
   259     my ($xml) = @_;
   260     my $info = { href => "", size => -1, mtime => -1, md5 => "-" };
   261     $info->{"href"}  = $1 if ($xml =~ m/\bhref="([^"]*)"/s);
   262     $info->{"size"}  = $1 if ($xml =~ m/\bsize="([^"]*)"/s);
   263     $info->{"mtime"} = $1 if ($xml =~ m/\bmtime="([^"]*)"/s);
   264     $info->{"md5"}   = $1 if ($xml =~ m/\bmd5="([^"]*)"/s);
   265     return $info;
   266 }
   267 sub get_urls ($) {
   268     my ($info) = @_;
   270     #   determine URL
   271     my $url = $info->{"href"};
   272     if ($url =~ m/^\//) {
   273         $url = "file://$url";
   274     }
   275     if ($url =~ m/\/$/) {
   276         $url .= "00INDEX.rdf";
   277     }
   278     $info->{"href"} = $url;
   280     #   fetch URL content
   281     print "-- fetching XML/RDF index: $url\n";
   282     my $tmpfile = "$tmpdir/openpkg-mirror.tmp";
   283     unlink($tmpfile);
   284     my $cmd = "$my_prefix/bin/openpkg curl -k -s -R -o \"$tmpfile\" \"$url\"";
   285     system($cmd);
   286     my $xml;
   287     if ($url =~ m/\.bz2$/) {
   288         $xml = `$my_prefix/lib/openpkg/bzip2 -d -c <"$tmpfile"`;
   289     }
   290     else {
   291         $xml = `cat "$tmpfile"`;
   292     }
   294     #   provide information about this URL
   295     my @urls = ();
   296     if ($info->{"size"} == -1 or $info->{"mtime"} == -1) {
   297         my ($S, $T) = sizetime($tmpfile);
   298         $info->{"size"} = $S;
   299         $info->{"mtime"} = $T;
   300     }
   301     if ($info->{"md5"} eq "-") {
   302         $info->{"md5"} = digest($tmpfile);
   303     }
   304     push(@urls, $info);
   305     unlink($tmpfile);
   307     #   parse references
   308     my @ref1 = ();
   309     my @ref2 = ();
   310     while ($xml =~ m/<Repository\s+([^>]*href=[^>]*)>/gs) {
   311         push(@ref1, $1);
   312     }
   313     while ($xml =~ m/<rdf:Description\s+([^>]*href=[^>]*)>/gs) {
   314         push(@ref2, $1);
   315     }
   317     #   provide information about referenced leave URLs
   318     foreach my $ref2 (@ref2) {
   319         my $i = parse_attr($ref2);
   320         $i->{"href"} = relurl($url, $i->{"href"});
   321         push(@urls, $i);
   322     }
   324     #   provide information about referenced index URLs
   325     foreach my $ref1 (@ref1) {
   326         my $i = parse_attr($ref1);
   327         $i->{"href"} = relurl($url, $i->{"href"});
   328         push(@urls, get_urls($i)); # RECURSION
   329     }
   331     return @urls;
   332 }
   333 my @remote_urls = get_urls({ href => $url, size => -1, mtime => -1, md5 => "-" });
   334 my $size = 0;
   335 foreach my $url (@remote_urls) {
   336     $size += int($url->{"size"}) if ($url->{"size"} > 0);
   337 }
   338 print "-- remote: " . @remote_urls . " files (" . humansize($size) . ")\n";
   340 ##
   341 ##   DOWNLOAD NEW OR UPDATED REMOTE FILES
   342 ##
   344 #   fetching remote files
   345 print "++ fetching new/updated files\n";
   346 my $local_files_keep = {};
   347 sub keep ($) {
   348     my ($path) = @_;
   349     #   keep path and all its parent paths
   350     while ($path ne "") {
   351         $local_files_keep->{$path} = 1;
   352         $path =~ s/(?:^\/$|\/?[^\/]+$)//s;
   353     }
   354 }
   355 sub mkdir_for_file ($$) {
   356     my ($prefix, $path) = @_;
   357     $path =~ s/(?:^\/$|\/?[^\/]+$)//s;
   358     my @dirs = split("/", $path);
   359     $path = $prefix;
   360     $path =~ s/\/$//s;
   361     foreach my $dir (@dirs) {
   362         $path .= "/$dir";
   363         if (not -d $path) {
   364             print "-- creating: $path\n";
   365             system("mkdir \"$path\"");
   366         }
   367     }
   368 }
   369 foreach my $remote_url (@remote_urls) {
   370     my $href = $remote_url->{"href"};
   371     if ($href =~ m/^\Q$remoteprefixurl\E(.+)$/) {
   372         #   derive local path
   373         my $local_path = $1;
   374         my $local_file = $localprefixdir . $local_path;
   375         keep($local_file);
   377         #   determine information about local path
   378         my $download = 1;
   379         my $cache_update = 0;
   380         my ($size, $mtime, $md5);
   381         if (-f $local_file) {
   382             my $c = $cache->{$local_file};
   383             $size  = $c->{"size"}  if (defined $c and defined $c->{"size"});
   384             $mtime = $c->{"mtime"} if (defined $c and defined $c->{"mtime"});
   385             if (not defined $size or not defined $mtime) {
   386                 ($size, $mtime) = sizetime($local_file);
   387                 $cache_update = 1;
   388             }
   389             if (    $size  == $remote_url->{"size"}
   390                 and $mtime == $remote_url->{"mtime"}) {
   391                 $md5 = $c->{"md5"} if (defined $c and defined $c->{"md5"});
   392                 if (not defined $md5) {
   393                     $md5 = digest($local_file);
   394                     $cache_update = 1;
   395                 }
   396                 if ($md5 eq $remote_url->{"md5"}) {
   397                     $download = 0;
   398                 }
   399             }
   400         }
   402         #   optionally download remote file to local path
   403         if ($download) {
   404             print "-- fetching: $local_path\n";
   405             mkdir_for_file($localprefixdir, $local_path);
   406             my $cmd = "$my_prefix/bin/openpkg curl";
   407             $cmd .= " -s"     if (    $opts->{"q"});
   408             $cmd .= " \"-#\"" if (not $opts->{"q"});
   409             $cmd .= " -k -R -o \"$local_file\" \"$href\"";
   410             system($cmd);
   411             ($size, $mtime) = sizetime($local_file);
   412             $md5 = digest($local_file);
   413             $cache_update = 1;
   414         }
   416         #   optionally update the size/mtime/md5 cache
   417         if ($opts->{"C"} and $cache_update) {
   418             $cache->{$local_file} = { size => $size, mtime => $mtime, md5 => $md5 };
   419             cache_save($cache);
   420         }
   421     }
   422     else {
   423         print "** WARNING: URL \"$href\" not under prefix \"$remoteprefixurl\": " .
   424             "unable to derive local path -- skipping URL\n";
   425     }
   426 }
   428 ##
   429 ##   REMOVE OBSOLETE LOCAL FILES
   430 ##
   432 #   remove obsolete files
   433 print "++ removing obsolete files\n";
   434 my $lf  = @local_files;
   435 my $lfk = keys %{$local_files_keep};
   436 if (not $opts->{"f"} and ($lf - $lfk) > ($lf * 0.3)) {
   437     print "** WARNING: more than 30% of your existing local files would be removed (require option -f)\n";
   438 }
   439 else {
   440     foreach my $local_file (@local_files) {
   441         if (not exists $local_files_keep->{$local_file}) {
   442             if (-f $local_file) {
   443                 print "-- removing: $local_file (file)\n";
   444                 system("rm \"$local_file\" >/dev/null 2>&1 || true");
   445             }
   446             elsif (-d $local_file) {
   447                 print "-- removing: $local_file (dir)\n";
   448                 system("rmdir \"$local_file\" >/dev/null 2>&1 || true");
   449             }
   450         }
   451     }
   452 }
   454 ##
   455 ##   CLEANUP
   456 ##
   458 #   optionally flush the cache
   459 cache_save($cache);
   461 #   exit gracefully
   462 exit(0);

mercurial