michael@428: ## michael@428: ## mirror.pl -- OpenPKG Package Mirroring michael@428: ## Copyright (c) 2011-2012 OpenPKG GmbH michael@428: ## michael@428: ## This software is property of the OpenPKG GmbH, DE MUC HRB 160208. michael@428: ## All rights reserved. Licenses which grant limited permission to use, michael@428: ## copy, modify and distribute this software are available from the michael@428: ## OpenPKG GmbH. michael@428: ## michael@428: ## THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED michael@428: ## WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF michael@428: ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. michael@428: ## IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR michael@428: ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@428: ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@428: ## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF michael@428: ## USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND michael@428: ## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, michael@428: ## OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT michael@428: ## OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF michael@428: ## SUCH DAMAGE. michael@428: ## michael@428: michael@428: require 5.003; michael@428: michael@428: # OpenPKG instance prefix and tools michael@428: my $my_prefix = $ENV{'OPENPKG_PREFIX'}; michael@428: my $my_rpm = "$my_prefix/bin/openpkg rpm"; michael@428: my $my_openssl = "$my_prefix/lib/openpkg/openssl"; michael@428: delete $ENV{'OPENPKG_PREFIX'}; michael@428: michael@428: # program identification michael@428: my $progname = "search"; michael@428: my $progvers = "0.1.0"; michael@428: michael@428: # determine reasonable temporary directory michael@428: my $tmpdir = ($ENV{"TMPDIR"} || "/tmp"); michael@428: michael@428: # use unbuffered stdout michael@428: $|++; michael@428: michael@428: ## michael@428: ## COMMAND-LINE INTERFACE HANDLING michael@428: ## michael@428: michael@428: # home-brewn getopt(3) style option parser michael@428: sub getopts ($$@) { michael@428: my ($opt_spec, $opts, @argv_orig) = @_; michael@428: my (%optf) = map { m/(\w)/; $1 => $_ } $opt_spec =~ m/(\w:|\w)/g; michael@428: my (@argv, $optarg); michael@428: michael@428: foreach (@argv_orig) { michael@428: if (@argv) { michael@428: push @argv, $_; michael@428: } elsif (defined $optarg) { michael@428: if (exists $opts->{$optarg}) { michael@428: $opts->{$optarg} .= " $_"; michael@428: } else { michael@428: $opts->{$optarg} = $_; michael@428: } michael@428: $optarg = undef; michael@428: } elsif (!/^[-]/) { michael@428: push @argv, $_; michael@428: } else { michael@428: while (/^\-(\w)(.*)/) { michael@428: if (exists $optf{$1}) { michael@428: if (length($optf{$1}) > 1) { michael@428: if ($2 ne '') { michael@428: if (exists $opts->{$1}) { michael@428: $opts->{$1} .= " $2"; michael@428: } else { michael@428: $opts->{$1} = $2; michael@428: } michael@428: } else { michael@428: $optarg = $1; michael@428: } michael@428: last; michael@428: } else { michael@428: $opts->{$1} = 1; michael@428: } michael@428: } else { michael@428: warn "openpkg:$prog_name:WARNING: unknown option $_\n"; michael@428: } michael@428: $_ = "-$2"; michael@428: } michael@428: } michael@428: } michael@428: if (defined $optarg) { michael@428: warn "openpkg:$prog_name:WARNING: option $optarg requires an argument\n"; michael@428: } michael@428: foreach my $opt (keys %optf) { michael@428: if (not exists $opts->{$opt}) { michael@428: $opts->{$opt} = (length($optf{$opt}) > 1 ? "" : 0); michael@428: } michael@428: } michael@428: return @argv; michael@428: } michael@428: michael@428: # parse command line options michael@428: my $opts = {}; michael@428: @ARGV = getopts("hfqr:p:C:", $opts, @ARGV); michael@428: michael@428: # usage sanity check and usage help michael@428: sub usage { michael@428: my ($rc) = @_; michael@428: my $usage = "openpkg:$prog_name:USAGE: openpkg mirror [-h] [-f] [-C ] [-r ] [-p ] \n"; michael@428: if ($rc == 0) { michael@428: print STDOUT $usage; michael@428: } michael@428: else { michael@428: print STDERR $usage; michael@428: } michael@428: exit($rc); michael@428: } michael@428: if ($opts->{"h"}) { michael@428: usage(0); michael@428: } michael@428: if (@ARGV != 1) { michael@428: usage(1); michael@428: } michael@428: michael@428: # determine arguments michael@428: my ($localprefixdir) = @ARGV; michael@428: if (not -d $localprefixdir) { michael@428: print STDERR "openpkg:$prog_name:ERROR: local directory already has to exist: $localprefixdir\n"; michael@428: exit(1); michael@428: } michael@428: michael@428: # determine start URL michael@428: my $url = $opts->{"r"} || ""; michael@428: if ($url eq "") { michael@428: $url = `$my_prefix/bin/openpkg release --fmt='%u' 2>/dev/null`; michael@428: $url =~ s/^\s+//s; michael@428: $url =~ s/\s+$//s; michael@428: } michael@428: if ($url eq "") { michael@428: print STDERR "openpkg:$prog_name:ERROR: no repository URL known\n"; michael@428: exit(1); michael@428: } michael@428: michael@428: # determine URL prefix michael@428: my $remoteprefixurl = $opts->{"p"}; michael@428: if (not defined $remoteprefixurl) { michael@428: $remoteprefixurl = $url; michael@428: $remoteprefixurl =~ s/\/([^\/]+)$//s; michael@428: } michael@428: michael@428: ## michael@428: ## HELPER FUNCTIONS michael@428: ## michael@428: michael@428: # determine size and mtime of a file michael@428: sub sizetime ($) { michael@428: my ($file) = @_; michael@428: my @stat = stat($file); michael@428: return ($stat[7], $stat[9]); michael@428: } michael@428: michael@428: # determine MD5 of a file michael@428: sub digest ($) { michael@428: my ($file) = @_; michael@428: my $digest = ""; michael@428: my $output = `($my_openssl md5 -hex "$file") 2>/dev/null || true`; michael@428: if ($output =~ m/MD5\(.+?\)=\s*([0-9a-fA-F]+)/s) { michael@428: $digest = uc($1); michael@428: } michael@428: return $digest; michael@428: } michael@428: michael@428: # print a byte size in human-readable form michael@428: sub humansize ($) { michael@428: my ($bytes) = @_; michael@428: my $size = ""; michael@428: if ($bytes > 1024*1024*1024) { $size = sprintf("%.1fGB", $bytes / (1024*1024*1024)); } michael@428: elsif ($bytes > 1024*1024) { $size = sprintf("%.1fMB", $bytes / (1024*1024) ); } michael@428: elsif ($bytes > 1024) { $size = sprintf("%.1fKB", $bytes / (1024) ); } michael@428: else { $size = sprintf("%dB", $bytes ); } michael@428: return $size; michael@428: } michael@428: michael@428: # caching support: load cache michael@428: sub cache_load () { michael@428: my $cache = {}; michael@428: if ($opts->{"C"}) { michael@428: if (-f $opts->{"C"}) { michael@428: # load cache file michael@428: open(FP, "<" . $opts->{"C"}) michael@428: or die "unable to load cache file: " . $opts->{"C"}; michael@428: my $txt = ""; $txt .= $_ while (); michael@428: close(FP); michael@428: michael@428: # parse cache file michael@428: $txt =~ s/^\s*(\d+)\s+(\d+)\s+([\da-fA-F]+)\s+(.+)$/ michael@428: $cache->{$4} = { size => $1, mtime => $2, md5 => $3 }, '' michael@428: /mge; michael@428: } michael@428: } michael@428: return $cache; michael@428: } michael@428: michael@428: # caching support: save cache michael@428: sub cache_save ($) { michael@428: my ($cache) = @_; michael@428: if ($opts->{"C"}) { michael@428: # generate cache file michael@428: my $txt = ""; michael@428: foreach my $url (sort { $a cmp $b } keys %{$cache}) { michael@428: $txt .= sprintf("%10d %10d %s %s\n", michael@428: $cache->{$url}->{"size"}, michael@428: $cache->{$url}->{"mtime"}, michael@428: $cache->{$url}->{"md5"}, michael@428: $url michael@428: ); michael@428: } michael@428: michael@428: # write cache file michael@428: open(FP, ">" . $opts->{"C"}) michael@428: or die "unable to save cache file: " . $opts->{"C"}; michael@428: print FP $txt; michael@428: close(FP); michael@428: } michael@428: return; michael@428: } michael@428: michael@428: ## michael@428: ## DETERMINE LOCAL AND REMOTE FILES michael@428: ## michael@428: michael@428: # optionally load the cache michael@428: my $cache = cache_load(); michael@428: michael@428: # determine local files michael@428: print "++ determining local files\n"; michael@428: my @local_files = map { michael@428: s/\r?\n$//s; michael@428: s/\/$//s; michael@428: $_; michael@428: } `find $localprefixdir -depth -print`; michael@428: print "-- local: " . @local_files . " files\n"; michael@428: michael@428: # determine remote files michael@428: print "++ determining remote files\n"; michael@428: sub relurl ($$) { michael@428: my ($url, $suburl) = @_; michael@428: if ($suburl =~ m/^\w+:\/\//) { michael@428: $url = $suburl; michael@428: } michael@428: elsif ($suburl =~ m/^\//) { michael@428: $url = "file://$suburl"; michael@428: } michael@428: else { michael@428: $url =~ s/(\/)?\/*[^\/]*$/$1$suburl/; michael@428: } michael@428: 1 while ($url =~ s/\/\.\//\//s); michael@428: 1 while ($url =~ s/\/[^\/]+\/\.\.\//\//s); michael@428: return $url; michael@428: } michael@428: sub parse_attr ($) { michael@428: my ($xml) = @_; michael@428: my $info = { href => "", size => -1, mtime => -1, md5 => "-" }; michael@428: $info->{"href"} = $1 if ($xml =~ m/\bhref="([^"]*)"/s); michael@428: $info->{"size"} = $1 if ($xml =~ m/\bsize="([^"]*)"/s); michael@428: $info->{"mtime"} = $1 if ($xml =~ m/\bmtime="([^"]*)"/s); michael@428: $info->{"md5"} = $1 if ($xml =~ m/\bmd5="([^"]*)"/s); michael@428: return $info; michael@428: } michael@428: sub get_urls ($) { michael@428: my ($info) = @_; michael@428: michael@428: # determine URL michael@428: my $url = $info->{"href"}; michael@428: if ($url =~ m/^\//) { michael@428: $url = "file://$url"; michael@428: } michael@428: if ($url =~ m/\/$/) { michael@428: $url .= "00INDEX.rdf"; michael@428: } michael@428: $info->{"href"} = $url; michael@428: michael@428: # fetch URL content michael@428: print "-- fetching XML/RDF index: $url\n"; michael@428: my $tmpfile = "$tmpdir/openpkg-mirror.tmp"; michael@428: unlink($tmpfile); michael@428: my $cmd = "$my_prefix/bin/openpkg curl -k -s -R -o \"$tmpfile\" \"$url\""; michael@428: system($cmd); michael@428: my $xml; michael@428: if ($url =~ m/\.bz2$/) { michael@428: $xml = `$my_prefix/lib/openpkg/bzip2 -d -c <"$tmpfile"`; michael@428: } michael@428: else { michael@428: $xml = `cat "$tmpfile"`; michael@428: } michael@428: michael@428: # provide information about this URL michael@428: my @urls = (); michael@428: if ($info->{"size"} == -1 or $info->{"mtime"} == -1) { michael@428: my ($S, $T) = sizetime($tmpfile); michael@428: $info->{"size"} = $S; michael@428: $info->{"mtime"} = $T; michael@428: } michael@428: if ($info->{"md5"} eq "-") { michael@428: $info->{"md5"} = digest($tmpfile); michael@428: } michael@428: push(@urls, $info); michael@428: unlink($tmpfile); michael@428: michael@428: # parse references michael@428: my @ref1 = (); michael@428: my @ref2 = (); michael@428: while ($xml =~ m/]*href=[^>]*)>/gs) { michael@428: push(@ref1, $1); michael@428: } michael@428: while ($xml =~ m/]*href=[^>]*)>/gs) { michael@428: push(@ref2, $1); michael@428: } michael@428: michael@428: # provide information about referenced leave URLs michael@428: foreach my $ref2 (@ref2) { michael@428: my $i = parse_attr($ref2); michael@428: $i->{"href"} = relurl($url, $i->{"href"}); michael@428: push(@urls, $i); michael@428: } michael@428: michael@428: # provide information about referenced index URLs michael@428: foreach my $ref1 (@ref1) { michael@428: my $i = parse_attr($ref1); michael@428: $i->{"href"} = relurl($url, $i->{"href"}); michael@428: push(@urls, get_urls($i)); # RECURSION michael@428: } michael@428: michael@428: return @urls; michael@428: } michael@428: my @remote_urls = get_urls({ href => $url, size => -1, mtime => -1, md5 => "-" }); michael@428: my $size = 0; michael@428: foreach my $url (@remote_urls) { michael@428: $size += int($url->{"size"}) if ($url->{"size"} > 0); michael@428: } michael@428: print "-- remote: " . @remote_urls . " files (" . humansize($size) . ")\n"; michael@428: michael@428: ## michael@428: ## DOWNLOAD NEW OR UPDATED REMOTE FILES michael@428: ## michael@428: michael@428: # fetching remote files michael@428: print "++ fetching new/updated files\n"; michael@428: my $local_files_keep = {}; michael@428: sub keep ($) { michael@428: my ($path) = @_; michael@428: # keep path and all its parent paths michael@428: while ($path ne "") { michael@428: $local_files_keep->{$path} = 1; michael@428: $path =~ s/(?:^\/$|\/?[^\/]+$)//s; michael@428: } michael@428: } michael@428: sub mkdir_for_file ($$) { michael@428: my ($prefix, $path) = @_; michael@428: $path =~ s/(?:^\/$|\/?[^\/]+$)//s; michael@428: my @dirs = split("/", $path); michael@428: $path = $prefix; michael@428: $path =~ s/\/$//s; michael@428: foreach my $dir (@dirs) { michael@428: $path .= "/$dir"; michael@428: if (not -d $path) { michael@428: print "-- creating: $path\n"; michael@428: system("mkdir \"$path\""); michael@428: } michael@428: } michael@428: } michael@428: foreach my $remote_url (@remote_urls) { michael@428: my $href = $remote_url->{"href"}; michael@428: if ($href =~ m/^\Q$remoteprefixurl\E(.+)$/) { michael@428: # derive local path michael@428: my $local_path = $1; michael@428: my $local_file = $localprefixdir . $local_path; michael@428: keep($local_file); michael@428: michael@428: # determine information about local path michael@428: my $download = 1; michael@428: my $cache_update = 0; michael@428: my ($size, $mtime, $md5); michael@428: if (-f $local_file) { michael@428: my $c = $cache->{$local_file}; michael@428: $size = $c->{"size"} if (defined $c and defined $c->{"size"}); michael@428: $mtime = $c->{"mtime"} if (defined $c and defined $c->{"mtime"}); michael@428: if (not defined $size or not defined $mtime) { michael@428: ($size, $mtime) = sizetime($local_file); michael@428: $cache_update = 1; michael@428: } michael@428: if ( $size == $remote_url->{"size"} michael@428: and $mtime == $remote_url->{"mtime"}) { michael@428: $md5 = $c->{"md5"} if (defined $c and defined $c->{"md5"}); michael@428: if (not defined $md5) { michael@428: $md5 = digest($local_file); michael@428: $cache_update = 1; michael@428: } michael@428: if ($md5 eq $remote_url->{"md5"}) { michael@428: $download = 0; michael@428: } michael@428: } michael@428: } michael@428: michael@428: # optionally download remote file to local path michael@428: if ($download) { michael@428: print "-- fetching: $local_path\n"; michael@428: mkdir_for_file($localprefixdir, $local_path); michael@428: my $cmd = "$my_prefix/bin/openpkg curl"; michael@428: $cmd .= " -s" if ( $opts->{"q"}); michael@428: $cmd .= " \"-#\"" if (not $opts->{"q"}); michael@428: $cmd .= " -k -R -o \"$local_file\" \"$href\""; michael@428: system($cmd); michael@428: ($size, $mtime) = sizetime($local_file); michael@428: $md5 = digest($local_file); michael@428: $cache_update = 1; michael@428: } michael@428: michael@428: # optionally update the size/mtime/md5 cache michael@428: if ($opts->{"C"} and $cache_update) { michael@428: $cache->{$local_file} = { size => $size, mtime => $mtime, md5 => $md5 }; michael@428: cache_save($cache); michael@428: } michael@428: } michael@428: else { michael@428: print "** WARNING: URL \"$href\" not under prefix \"$remoteprefixurl\": " . michael@428: "unable to derive local path -- skipping URL\n"; michael@428: } michael@428: } michael@428: michael@428: ## michael@428: ## REMOVE OBSOLETE LOCAL FILES michael@428: ## michael@428: michael@428: # remove obsolete files michael@428: print "++ removing obsolete files\n"; michael@428: my $lf = @local_files; michael@428: my $lfk = keys %{$local_files_keep}; michael@428: if (not $opts->{"f"} and ($lf - $lfk) > ($lf * 0.3)) { michael@428: print "** WARNING: more than 30% of your existing local files would be removed (require option -f)\n"; michael@428: } michael@428: else { michael@428: foreach my $local_file (@local_files) { michael@428: if (not exists $local_files_keep->{$local_file}) { michael@428: if (-f $local_file) { michael@428: print "-- removing: $local_file (file)\n"; michael@428: system("rm \"$local_file\" >/dev/null 2>&1 || true"); michael@428: } michael@428: elsif (-d $local_file) { michael@428: print "-- removing: $local_file (dir)\n"; michael@428: system("rmdir \"$local_file\" >/dev/null 2>&1 || true"); michael@428: } michael@428: } michael@428: } michael@428: } michael@428: michael@428: ## michael@428: ## CLEANUP michael@428: ## michael@428: michael@428: # optionally flush the cache michael@428: cache_save($cache); michael@428: michael@428: # exit gracefully michael@428: exit(0); michael@428: