diff -r 71503088f51b -r f880f219c566 openpkg/mirror.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/openpkg/mirror.pl Tue Jul 31 12:23:42 2012 +0200
@@ -0,0 +1,463 @@
+##
+## mirror.pl -- OpenPKG Package Mirroring
+## Copyright (c) 2011-2012 OpenPKG GmbH
+##
+## This software is property of the OpenPKG GmbH, DE MUC HRB 160208.
+## All rights reserved. Licenses which grant limited permission to use,
+## copy, modify and distribute this software are available from the
+## OpenPKG GmbH.
+##
+## THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
+## WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+## IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
+## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+## USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+## OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+## OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+## SUCH DAMAGE.
+##
+
+require 5.003;
+
+# OpenPKG instance prefix and tools
+my $my_prefix = $ENV{'OPENPKG_PREFIX'};
+my $my_rpm = "$my_prefix/bin/openpkg rpm";
+my $my_openssl = "$my_prefix/lib/openpkg/openssl";
+delete $ENV{'OPENPKG_PREFIX'};
+
+# program identification
+my $progname = "search";
+my $progvers = "0.1.0";
+
+# determine reasonable temporary directory
+my $tmpdir = ($ENV{"TMPDIR"} || "/tmp");
+
+# use unbuffered stdout
+$|++;
+
+##
+## COMMAND-LINE INTERFACE HANDLING
+##
+
+# home-brewn getopt(3) style option parser
+sub getopts ($$@) {
+ my ($opt_spec, $opts, @argv_orig) = @_;
+ my (%optf) = map { m/(\w)/; $1 => $_ } $opt_spec =~ m/(\w:|\w)/g;
+ my (@argv, $optarg);
+
+ foreach (@argv_orig) {
+ if (@argv) {
+ push @argv, $_;
+ } elsif (defined $optarg) {
+ if (exists $opts->{$optarg}) {
+ $opts->{$optarg} .= " $_";
+ } else {
+ $opts->{$optarg} = $_;
+ }
+ $optarg = undef;
+ } elsif (!/^[-]/) {
+ push @argv, $_;
+ } else {
+ while (/^\-(\w)(.*)/) {
+ if (exists $optf{$1}) {
+ if (length($optf{$1}) > 1) {
+ if ($2 ne '') {
+ if (exists $opts->{$1}) {
+ $opts->{$1} .= " $2";
+ } else {
+ $opts->{$1} = $2;
+ }
+ } else {
+ $optarg = $1;
+ }
+ last;
+ } else {
+ $opts->{$1} = 1;
+ }
+ } else {
+ warn "openpkg:$prog_name:WARNING: unknown option $_\n";
+ }
+ $_ = "-$2";
+ }
+ }
+ }
+ if (defined $optarg) {
+ warn "openpkg:$prog_name:WARNING: option $optarg requires an argument\n";
+ }
+ foreach my $opt (keys %optf) {
+ if (not exists $opts->{$opt}) {
+ $opts->{$opt} = (length($optf{$opt}) > 1 ? "" : 0);
+ }
+ }
+ return @argv;
+}
+
+# parse command line options
+my $opts = {};
+@ARGV = getopts("hfqr:p:C:", $opts, @ARGV);
+
+# usage sanity check and usage help
+sub usage {
+ my ($rc) = @_;
+ my $usage = "openpkg:$prog_name:USAGE: openpkg mirror [-h] [-f] [-C ] [-r ] [-p ] \n";
+ if ($rc == 0) {
+ print STDOUT $usage;
+ }
+ else {
+ print STDERR $usage;
+ }
+ exit($rc);
+}
+if ($opts->{"h"}) {
+ usage(0);
+}
+if (@ARGV != 1) {
+ usage(1);
+}
+
+# determine arguments
+my ($localprefixdir) = @ARGV;
+if (not -d $localprefixdir) {
+ print STDERR "openpkg:$prog_name:ERROR: local directory already has to exist: $localprefixdir\n";
+ exit(1);
+}
+
+# determine start URL
+my $url = $opts->{"r"} || "";
+if ($url eq "") {
+ $url = `$my_prefix/bin/openpkg release --fmt='%u' 2>/dev/null`;
+ $url =~ s/^\s+//s;
+ $url =~ s/\s+$//s;
+}
+if ($url eq "") {
+ print STDERR "openpkg:$prog_name:ERROR: no repository URL known\n";
+ exit(1);
+}
+
+# determine URL prefix
+my $remoteprefixurl = $opts->{"p"};
+if (not defined $remoteprefixurl) {
+ $remoteprefixurl = $url;
+ $remoteprefixurl =~ s/\/([^\/]+)$//s;
+}
+
+##
+## HELPER FUNCTIONS
+##
+
+# determine size and mtime of a file
+sub sizetime ($) {
+ my ($file) = @_;
+ my @stat = stat($file);
+ return ($stat[7], $stat[9]);
+}
+
+# determine MD5 of a file
+sub digest ($) {
+ my ($file) = @_;
+ my $digest = "";
+ my $output = `($my_openssl md5 -hex "$file") 2>/dev/null || true`;
+ if ($output =~ m/MD5\(.+?\)=\s*([0-9a-fA-F]+)/s) {
+ $digest = uc($1);
+ }
+ return $digest;
+}
+
+# print a byte size in human-readable form
+sub humansize ($) {
+ my ($bytes) = @_;
+ my $size = "";
+ if ($bytes > 1024*1024*1024) { $size = sprintf("%.1fGB", $bytes / (1024*1024*1024)); }
+ elsif ($bytes > 1024*1024) { $size = sprintf("%.1fMB", $bytes / (1024*1024) ); }
+ elsif ($bytes > 1024) { $size = sprintf("%.1fKB", $bytes / (1024) ); }
+ else { $size = sprintf("%dB", $bytes ); }
+ return $size;
+}
+
+# caching support: load cache
+sub cache_load () {
+ my $cache = {};
+ if ($opts->{"C"}) {
+ if (-f $opts->{"C"}) {
+ # load cache file
+ open(FP, "<" . $opts->{"C"})
+ or die "unable to load cache file: " . $opts->{"C"};
+ my $txt = ""; $txt .= $_ while ();
+ close(FP);
+
+ # parse cache file
+ $txt =~ s/^\s*(\d+)\s+(\d+)\s+([\da-fA-F]+)\s+(.+)$/
+ $cache->{$4} = { size => $1, mtime => $2, md5 => $3 }, ''
+ /mge;
+ }
+ }
+ return $cache;
+}
+
+# caching support: save cache
+sub cache_save ($) {
+ my ($cache) = @_;
+ if ($opts->{"C"}) {
+ # generate cache file
+ my $txt = "";
+ foreach my $url (sort { $a cmp $b } keys %{$cache}) {
+ $txt .= sprintf("%10d %10d %s %s\n",
+ $cache->{$url}->{"size"},
+ $cache->{$url}->{"mtime"},
+ $cache->{$url}->{"md5"},
+ $url
+ );
+ }
+
+ # write cache file
+ open(FP, ">" . $opts->{"C"})
+ or die "unable to save cache file: " . $opts->{"C"};
+ print FP $txt;
+ close(FP);
+ }
+ return;
+}
+
+##
+## DETERMINE LOCAL AND REMOTE FILES
+##
+
+# optionally load the cache
+my $cache = cache_load();
+
+# determine local files
+print "++ determining local files\n";
+my @local_files = map {
+ s/\r?\n$//s;
+ s/\/$//s;
+ $_;
+} `find $localprefixdir -depth -print`;
+print "-- local: " . @local_files . " files\n";
+
+# determine remote files
+print "++ determining remote files\n";
+sub relurl ($$) {
+ my ($url, $suburl) = @_;
+ if ($suburl =~ m/^\w+:\/\//) {
+ $url = $suburl;
+ }
+ elsif ($suburl =~ m/^\//) {
+ $url = "file://$suburl";
+ }
+ else {
+ $url =~ s/(\/)?\/*[^\/]*$/$1$suburl/;
+ }
+ 1 while ($url =~ s/\/\.\//\//s);
+ 1 while ($url =~ s/\/[^\/]+\/\.\.\//\//s);
+ return $url;
+}
+sub parse_attr ($) {
+ my ($xml) = @_;
+ my $info = { href => "", size => -1, mtime => -1, md5 => "-" };
+ $info->{"href"} = $1 if ($xml =~ m/\bhref="([^"]*)"/s);
+ $info->{"size"} = $1 if ($xml =~ m/\bsize="([^"]*)"/s);
+ $info->{"mtime"} = $1 if ($xml =~ m/\bmtime="([^"]*)"/s);
+ $info->{"md5"} = $1 if ($xml =~ m/\bmd5="([^"]*)"/s);
+ return $info;
+}
+sub get_urls ($) {
+ my ($info) = @_;
+
+ # determine URL
+ my $url = $info->{"href"};
+ if ($url =~ m/^\//) {
+ $url = "file://$url";
+ }
+ if ($url =~ m/\/$/) {
+ $url .= "00INDEX.rdf";
+ }
+ $info->{"href"} = $url;
+
+ # fetch URL content
+ print "-- fetching XML/RDF index: $url\n";
+ my $tmpfile = "$tmpdir/openpkg-mirror.tmp";
+ unlink($tmpfile);
+ my $cmd = "$my_prefix/bin/openpkg curl -k -s -R -o \"$tmpfile\" \"$url\"";
+ system($cmd);
+ my $xml;
+ if ($url =~ m/\.bz2$/) {
+ $xml = `$my_prefix/lib/openpkg/bzip2 -d -c <"$tmpfile"`;
+ }
+ else {
+ $xml = `cat "$tmpfile"`;
+ }
+
+ # provide information about this URL
+ my @urls = ();
+ if ($info->{"size"} == -1 or $info->{"mtime"} == -1) {
+ my ($S, $T) = sizetime($tmpfile);
+ $info->{"size"} = $S;
+ $info->{"mtime"} = $T;
+ }
+ if ($info->{"md5"} eq "-") {
+ $info->{"md5"} = digest($tmpfile);
+ }
+ push(@urls, $info);
+ unlink($tmpfile);
+
+ # parse references
+ my @ref1 = ();
+ my @ref2 = ();
+ while ($xml =~ m/]*href=[^>]*)>/gs) {
+ push(@ref1, $1);
+ }
+ while ($xml =~ m/]*href=[^>]*)>/gs) {
+ push(@ref2, $1);
+ }
+
+ # provide information about referenced leave URLs
+ foreach my $ref2 (@ref2) {
+ my $i = parse_attr($ref2);
+ $i->{"href"} = relurl($url, $i->{"href"});
+ push(@urls, $i);
+ }
+
+ # provide information about referenced index URLs
+ foreach my $ref1 (@ref1) {
+ my $i = parse_attr($ref1);
+ $i->{"href"} = relurl($url, $i->{"href"});
+ push(@urls, get_urls($i)); # RECURSION
+ }
+
+ return @urls;
+}
+my @remote_urls = get_urls({ href => $url, size => -1, mtime => -1, md5 => "-" });
+my $size = 0;
+foreach my $url (@remote_urls) {
+ $size += int($url->{"size"}) if ($url->{"size"} > 0);
+}
+print "-- remote: " . @remote_urls . " files (" . humansize($size) . ")\n";
+
+##
+## DOWNLOAD NEW OR UPDATED REMOTE FILES
+##
+
+# fetching remote files
+print "++ fetching new/updated files\n";
+my $local_files_keep = {};
+sub keep ($) {
+ my ($path) = @_;
+ # keep path and all its parent paths
+ while ($path ne "") {
+ $local_files_keep->{$path} = 1;
+ $path =~ s/(?:^\/$|\/?[^\/]+$)//s;
+ }
+}
+sub mkdir_for_file ($$) {
+ my ($prefix, $path) = @_;
+ $path =~ s/(?:^\/$|\/?[^\/]+$)//s;
+ my @dirs = split("/", $path);
+ $path = $prefix;
+ $path =~ s/\/$//s;
+ foreach my $dir (@dirs) {
+ $path .= "/$dir";
+ if (not -d $path) {
+ print "-- creating: $path\n";
+ system("mkdir \"$path\"");
+ }
+ }
+}
+foreach my $remote_url (@remote_urls) {
+ my $href = $remote_url->{"href"};
+ if ($href =~ m/^\Q$remoteprefixurl\E(.+)$/) {
+ # derive local path
+ my $local_path = $1;
+ my $local_file = $localprefixdir . $local_path;
+ keep($local_file);
+
+ # determine information about local path
+ my $download = 1;
+ my $cache_update = 0;
+ my ($size, $mtime, $md5);
+ if (-f $local_file) {
+ my $c = $cache->{$local_file};
+ $size = $c->{"size"} if (defined $c and defined $c->{"size"});
+ $mtime = $c->{"mtime"} if (defined $c and defined $c->{"mtime"});
+ if (not defined $size or not defined $mtime) {
+ ($size, $mtime) = sizetime($local_file);
+ $cache_update = 1;
+ }
+ if ( $size == $remote_url->{"size"}
+ and $mtime == $remote_url->{"mtime"}) {
+ $md5 = $c->{"md5"} if (defined $c and defined $c->{"md5"});
+ if (not defined $md5) {
+ $md5 = digest($local_file);
+ $cache_update = 1;
+ }
+ if ($md5 eq $remote_url->{"md5"}) {
+ $download = 0;
+ }
+ }
+ }
+
+ # optionally download remote file to local path
+ if ($download) {
+ print "-- fetching: $local_path\n";
+ mkdir_for_file($localprefixdir, $local_path);
+ my $cmd = "$my_prefix/bin/openpkg curl";
+ $cmd .= " -s" if ( $opts->{"q"});
+ $cmd .= " \"-#\"" if (not $opts->{"q"});
+ $cmd .= " -k -R -o \"$local_file\" \"$href\"";
+ system($cmd);
+ ($size, $mtime) = sizetime($local_file);
+ $md5 = digest($local_file);
+ $cache_update = 1;
+ }
+
+ # optionally update the size/mtime/md5 cache
+ if ($opts->{"C"} and $cache_update) {
+ $cache->{$local_file} = { size => $size, mtime => $mtime, md5 => $md5 };
+ cache_save($cache);
+ }
+ }
+ else {
+ print "** WARNING: URL \"$href\" not under prefix \"$remoteprefixurl\": " .
+ "unable to derive local path -- skipping URL\n";
+ }
+}
+
+##
+## REMOVE OBSOLETE LOCAL FILES
+##
+
+# remove obsolete files
+print "++ removing obsolete files\n";
+my $lf = @local_files;
+my $lfk = keys %{$local_files_keep};
+if (not $opts->{"f"} and ($lf - $lfk) > ($lf * 0.3)) {
+ print "** WARNING: more than 30% of your existing local files would be removed (require option -f)\n";
+}
+else {
+ foreach my $local_file (@local_files) {
+ if (not exists $local_files_keep->{$local_file}) {
+ if (-f $local_file) {
+ print "-- removing: $local_file (file)\n";
+ system("rm \"$local_file\" >/dev/null 2>&1 || true");
+ }
+ elsif (-d $local_file) {
+ print "-- removing: $local_file (dir)\n";
+ system("rmdir \"$local_file\" >/dev/null 2>&1 || true");
+ }
+ }
+ }
+}
+
+##
+## CLEANUP
+##
+
+# optionally flush the cache
+cache_save($cache);
+
+# exit gracefully
+exit(0);
+