Mon, 28 Jan 2013 17:37:18 +0100
Correct socket error reporting improvement with IPv6 portable code,
after helpful recommendation by Saúl Ibarra Corretgé on OSips devlist.
1 ##
2 ## mirror.pl -- OpenPKG Package Mirroring
3 ## Copyright (c) 2011-2012 OpenPKG GmbH <http://openpkg.com/>
4 ##
5 ## This software is property of the OpenPKG GmbH, DE MUC HRB 160208.
6 ## All rights reserved. Licenses which grant limited permission to use,
7 ## copy, modify and distribute this software are available from the
8 ## OpenPKG GmbH.
9 ##
10 ## THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
11 ## WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
12 ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
13 ## IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
14 ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
15 ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
16 ## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
17 ## USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
18 ## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
19 ## OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
20 ## OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
21 ## SUCH DAMAGE.
22 ##
24 require 5.003;
26 # OpenPKG instance prefix and tools
27 my $my_prefix = $ENV{'OPENPKG_PREFIX'};
28 my $my_rpm = "$my_prefix/bin/openpkg rpm";
29 my $my_openssl = "$my_prefix/lib/openpkg/openssl";
30 delete $ENV{'OPENPKG_PREFIX'};
32 # program identification
33 my $progname = "search";
34 my $progvers = "0.1.0";
36 # determine reasonable temporary directory
37 my $tmpdir = ($ENV{"TMPDIR"} || "/tmp");
39 # use unbuffered stdout
40 $|++;
42 ##
43 ## COMMAND-LINE INTERFACE HANDLING
44 ##
46 # home-brewn getopt(3) style option parser
47 sub getopts ($$@) {
48 my ($opt_spec, $opts, @argv_orig) = @_;
49 my (%optf) = map { m/(\w)/; $1 => $_ } $opt_spec =~ m/(\w:|\w)/g;
50 my (@argv, $optarg);
52 foreach (@argv_orig) {
53 if (@argv) {
54 push @argv, $_;
55 } elsif (defined $optarg) {
56 if (exists $opts->{$optarg}) {
57 $opts->{$optarg} .= " $_";
58 } else {
59 $opts->{$optarg} = $_;
60 }
61 $optarg = undef;
62 } elsif (!/^[-]/) {
63 push @argv, $_;
64 } else {
65 while (/^\-(\w)(.*)/) {
66 if (exists $optf{$1}) {
67 if (length($optf{$1}) > 1) {
68 if ($2 ne '') {
69 if (exists $opts->{$1}) {
70 $opts->{$1} .= " $2";
71 } else {
72 $opts->{$1} = $2;
73 }
74 } else {
75 $optarg = $1;
76 }
77 last;
78 } else {
79 $opts->{$1} = 1;
80 }
81 } else {
82 warn "openpkg:$prog_name:WARNING: unknown option $_\n";
83 }
84 $_ = "-$2";
85 }
86 }
87 }
88 if (defined $optarg) {
89 warn "openpkg:$prog_name:WARNING: option $optarg requires an argument\n";
90 }
91 foreach my $opt (keys %optf) {
92 if (not exists $opts->{$opt}) {
93 $opts->{$opt} = (length($optf{$opt}) > 1 ? "" : 0);
94 }
95 }
96 return @argv;
97 }
99 # parse command line options
100 my $opts = {};
101 @ARGV = getopts("hfqr:p:C:", $opts, @ARGV);
103 # usage sanity check and usage help
104 sub usage {
105 my ($rc) = @_;
106 my $usage = "openpkg:$prog_name:USAGE: openpkg mirror [-h] [-f] [-C <cache-file>] [-r <remote-url>] [-p <remote-url-prefix>] <local-prefix-dir>\n";
107 if ($rc == 0) {
108 print STDOUT $usage;
109 }
110 else {
111 print STDERR $usage;
112 }
113 exit($rc);
114 }
115 if ($opts->{"h"}) {
116 usage(0);
117 }
118 if (@ARGV != 1) {
119 usage(1);
120 }
122 # determine arguments
123 my ($localprefixdir) = @ARGV;
124 if (not -d $localprefixdir) {
125 print STDERR "openpkg:$prog_name:ERROR: local directory already has to exist: $localprefixdir\n";
126 exit(1);
127 }
129 # determine start URL
130 my $url = $opts->{"r"} || "";
131 if ($url eq "") {
132 $url = `$my_prefix/bin/openpkg release --fmt='%u' 2>/dev/null`;
133 $url =~ s/^\s+//s;
134 $url =~ s/\s+$//s;
135 }
136 if ($url eq "") {
137 print STDERR "openpkg:$prog_name:ERROR: no repository URL known\n";
138 exit(1);
139 }
141 # determine URL prefix
142 my $remoteprefixurl = $opts->{"p"};
143 if (not defined $remoteprefixurl) {
144 $remoteprefixurl = $url;
145 $remoteprefixurl =~ s/\/([^\/]+)$//s;
146 }
148 ##
149 ## HELPER FUNCTIONS
150 ##
152 # determine size and mtime of a file
153 sub sizetime ($) {
154 my ($file) = @_;
155 my @stat = stat($file);
156 return ($stat[7], $stat[9]);
157 }
159 # determine MD5 of a file
160 sub digest ($) {
161 my ($file) = @_;
162 my $digest = "";
163 my $output = `($my_openssl md5 -hex "$file") 2>/dev/null || true`;
164 if ($output =~ m/MD5\(.+?\)=\s*([0-9a-fA-F]+)/s) {
165 $digest = uc($1);
166 }
167 return $digest;
168 }
170 # print a byte size in human-readable form
171 sub humansize ($) {
172 my ($bytes) = @_;
173 my $size = "";
174 if ($bytes > 1024*1024*1024) { $size = sprintf("%.1fGB", $bytes / (1024*1024*1024)); }
175 elsif ($bytes > 1024*1024) { $size = sprintf("%.1fMB", $bytes / (1024*1024) ); }
176 elsif ($bytes > 1024) { $size = sprintf("%.1fKB", $bytes / (1024) ); }
177 else { $size = sprintf("%dB", $bytes ); }
178 return $size;
179 }
181 # caching support: load cache
182 sub cache_load () {
183 my $cache = {};
184 if ($opts->{"C"}) {
185 if (-f $opts->{"C"}) {
186 # load cache file
187 open(FP, "<" . $opts->{"C"})
188 or die "unable to load cache file: " . $opts->{"C"};
189 my $txt = ""; $txt .= $_ while (<FP>);
190 close(FP);
192 # parse cache file
193 $txt =~ s/^\s*(\d+)\s+(\d+)\s+([\da-fA-F]+)\s+(.+)$/
194 $cache->{$4} = { size => $1, mtime => $2, md5 => $3 }, ''
195 /mge;
196 }
197 }
198 return $cache;
199 }
201 # caching support: save cache
202 sub cache_save ($) {
203 my ($cache) = @_;
204 if ($opts->{"C"}) {
205 # generate cache file
206 my $txt = "";
207 foreach my $url (sort { $a cmp $b } keys %{$cache}) {
208 $txt .= sprintf("%10d %10d %s %s\n",
209 $cache->{$url}->{"size"},
210 $cache->{$url}->{"mtime"},
211 $cache->{$url}->{"md5"},
212 $url
213 );
214 }
216 # write cache file
217 open(FP, ">" . $opts->{"C"})
218 or die "unable to save cache file: " . $opts->{"C"};
219 print FP $txt;
220 close(FP);
221 }
222 return;
223 }
225 ##
226 ## DETERMINE LOCAL AND REMOTE FILES
227 ##
229 # optionally load the cache
230 my $cache = cache_load();
232 # determine local files
233 print "++ determining local files\n";
234 my @local_files = map {
235 s/\r?\n$//s;
236 s/\/$//s;
237 $_;
238 } `find $localprefixdir -depth -print`;
239 print "-- local: " . @local_files . " files\n";
241 # determine remote files
242 print "++ determining remote files\n";
243 sub relurl ($$) {
244 my ($url, $suburl) = @_;
245 if ($suburl =~ m/^\w+:\/\//) {
246 $url = $suburl;
247 }
248 elsif ($suburl =~ m/^\//) {
249 $url = "file://$suburl";
250 }
251 else {
252 $url =~ s/(\/)?\/*[^\/]*$/$1$suburl/;
253 }
254 1 while ($url =~ s/\/\.\//\//s);
255 1 while ($url =~ s/\/[^\/]+\/\.\.\//\//s);
256 return $url;
257 }
258 sub parse_attr ($) {
259 my ($xml) = @_;
260 my $info = { href => "", size => -1, mtime => -1, md5 => "-" };
261 $info->{"href"} = $1 if ($xml =~ m/\bhref="([^"]*)"/s);
262 $info->{"size"} = $1 if ($xml =~ m/\bsize="([^"]*)"/s);
263 $info->{"mtime"} = $1 if ($xml =~ m/\bmtime="([^"]*)"/s);
264 $info->{"md5"} = $1 if ($xml =~ m/\bmd5="([^"]*)"/s);
265 return $info;
266 }
267 sub get_urls ($) {
268 my ($info) = @_;
270 # determine URL
271 my $url = $info->{"href"};
272 if ($url =~ m/^\//) {
273 $url = "file://$url";
274 }
275 if ($url =~ m/\/$/) {
276 $url .= "00INDEX.rdf";
277 }
278 $info->{"href"} = $url;
280 # fetch URL content
281 print "-- fetching XML/RDF index: $url\n";
282 my $tmpfile = "$tmpdir/openpkg-mirror.tmp";
283 unlink($tmpfile);
284 my $cmd = "$my_prefix/bin/openpkg curl -k -s -R -o \"$tmpfile\" \"$url\"";
285 system($cmd);
286 my $xml;
287 if ($url =~ m/\.bz2$/) {
288 $xml = `$my_prefix/lib/openpkg/bzip2 -d -c <"$tmpfile"`;
289 }
290 else {
291 $xml = `cat "$tmpfile"`;
292 }
294 # provide information about this URL
295 my @urls = ();
296 if ($info->{"size"} == -1 or $info->{"mtime"} == -1) {
297 my ($S, $T) = sizetime($tmpfile);
298 $info->{"size"} = $S;
299 $info->{"mtime"} = $T;
300 }
301 if ($info->{"md5"} eq "-") {
302 $info->{"md5"} = digest($tmpfile);
303 }
304 push(@urls, $info);
305 unlink($tmpfile);
307 # parse references
308 my @ref1 = ();
309 my @ref2 = ();
310 while ($xml =~ m/<Repository\s+([^>]*href=[^>]*)>/gs) {
311 push(@ref1, $1);
312 }
313 while ($xml =~ m/<rdf:Description\s+([^>]*href=[^>]*)>/gs) {
314 push(@ref2, $1);
315 }
317 # provide information about referenced leave URLs
318 foreach my $ref2 (@ref2) {
319 my $i = parse_attr($ref2);
320 $i->{"href"} = relurl($url, $i->{"href"});
321 push(@urls, $i);
322 }
324 # provide information about referenced index URLs
325 foreach my $ref1 (@ref1) {
326 my $i = parse_attr($ref1);
327 $i->{"href"} = relurl($url, $i->{"href"});
328 push(@urls, get_urls($i)); # RECURSION
329 }
331 return @urls;
332 }
333 my @remote_urls = get_urls({ href => $url, size => -1, mtime => -1, md5 => "-" });
334 my $size = 0;
335 foreach my $url (@remote_urls) {
336 $size += int($url->{"size"}) if ($url->{"size"} > 0);
337 }
338 print "-- remote: " . @remote_urls . " files (" . humansize($size) . ")\n";
340 ##
341 ## DOWNLOAD NEW OR UPDATED REMOTE FILES
342 ##
344 # fetching remote files
345 print "++ fetching new/updated files\n";
346 my $local_files_keep = {};
347 sub keep ($) {
348 my ($path) = @_;
349 # keep path and all its parent paths
350 while ($path ne "") {
351 $local_files_keep->{$path} = 1;
352 $path =~ s/(?:^\/$|\/?[^\/]+$)//s;
353 }
354 }
355 sub mkdir_for_file ($$) {
356 my ($prefix, $path) = @_;
357 $path =~ s/(?:^\/$|\/?[^\/]+$)//s;
358 my @dirs = split("/", $path);
359 $path = $prefix;
360 $path =~ s/\/$//s;
361 foreach my $dir (@dirs) {
362 $path .= "/$dir";
363 if (not -d $path) {
364 print "-- creating: $path\n";
365 system("mkdir \"$path\"");
366 }
367 }
368 }
369 foreach my $remote_url (@remote_urls) {
370 my $href = $remote_url->{"href"};
371 if ($href =~ m/^\Q$remoteprefixurl\E(.+)$/) {
372 # derive local path
373 my $local_path = $1;
374 my $local_file = $localprefixdir . $local_path;
375 keep($local_file);
377 # determine information about local path
378 my $download = 1;
379 my $cache_update = 0;
380 my ($size, $mtime, $md5);
381 if (-f $local_file) {
382 my $c = $cache->{$local_file};
383 $size = $c->{"size"} if (defined $c and defined $c->{"size"});
384 $mtime = $c->{"mtime"} if (defined $c and defined $c->{"mtime"});
385 if (not defined $size or not defined $mtime) {
386 ($size, $mtime) = sizetime($local_file);
387 $cache_update = 1;
388 }
389 if ( $size == $remote_url->{"size"}
390 and $mtime == $remote_url->{"mtime"}) {
391 $md5 = $c->{"md5"} if (defined $c and defined $c->{"md5"});
392 if (not defined $md5) {
393 $md5 = digest($local_file);
394 $cache_update = 1;
395 }
396 if ($md5 eq $remote_url->{"md5"}) {
397 $download = 0;
398 }
399 }
400 }
402 # optionally download remote file to local path
403 if ($download) {
404 print "-- fetching: $local_path\n";
405 mkdir_for_file($localprefixdir, $local_path);
406 my $cmd = "$my_prefix/bin/openpkg curl";
407 $cmd .= " -s" if ( $opts->{"q"});
408 $cmd .= " \"-#\"" if (not $opts->{"q"});
409 $cmd .= " -k -R -o \"$local_file\" \"$href\"";
410 system($cmd);
411 ($size, $mtime) = sizetime($local_file);
412 $md5 = digest($local_file);
413 $cache_update = 1;
414 }
416 # optionally update the size/mtime/md5 cache
417 if ($opts->{"C"} and $cache_update) {
418 $cache->{$local_file} = { size => $size, mtime => $mtime, md5 => $md5 };
419 cache_save($cache);
420 }
421 }
422 else {
423 print "** WARNING: URL \"$href\" not under prefix \"$remoteprefixurl\": " .
424 "unable to derive local path -- skipping URL\n";
425 }
426 }
428 ##
429 ## REMOVE OBSOLETE LOCAL FILES
430 ##
432 # remove obsolete files
433 print "++ removing obsolete files\n";
434 my $lf = @local_files;
435 my $lfk = keys %{$local_files_keep};
436 if (not $opts->{"f"} and ($lf - $lfk) > ($lf * 0.3)) {
437 print "** WARNING: more than 30% of your existing local files would be removed (require option -f)\n";
438 }
439 else {
440 foreach my $local_file (@local_files) {
441 if (not exists $local_files_keep->{$local_file}) {
442 if (-f $local_file) {
443 print "-- removing: $local_file (file)\n";
444 system("rm \"$local_file\" >/dev/null 2>&1 || true");
445 }
446 elsif (-d $local_file) {
447 print "-- removing: $local_file (dir)\n";
448 system("rmdir \"$local_file\" >/dev/null 2>&1 || true");
449 }
450 }
451 }
452 }
454 ##
455 ## CLEANUP
456 ##
458 # optionally flush the cache
459 cache_save($cache);
461 # exit gracefully
462 exit(0);