| |
1 ## |
| |
2 ## mirror.pl -- OpenPKG Package Mirroring |
| |
3 ## Copyright (c) 2011-2012 OpenPKG GmbH <http://openpkg.com/> |
| |
4 ## |
| |
5 ## This software is property of the OpenPKG GmbH, DE MUC HRB 160208. |
| |
6 ## All rights reserved. Licenses which grant limited permission to use, |
| |
7 ## copy, modify and distribute this software are available from the |
| |
8 ## OpenPKG GmbH. |
| |
9 ## |
| |
10 ## THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED |
| |
11 ## WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
| |
12 ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| |
13 ## IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR |
| |
14 ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| |
15 ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| |
16 ## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| |
17 ## USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| |
18 ## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| |
19 ## OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| |
20 ## OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| |
21 ## SUCH DAMAGE. |
| |
22 ## |
| |
23 |
| |
24 require 5.003; |
| |
25 |
| |
26 # OpenPKG instance prefix and tools |
| |
27 my $my_prefix = $ENV{'OPENPKG_PREFIX'}; |
| |
28 my $my_rpm = "$my_prefix/bin/openpkg rpm"; |
| |
29 my $my_openssl = "$my_prefix/lib/openpkg/openssl"; |
| |
30 delete $ENV{'OPENPKG_PREFIX'}; |
| |
31 |
| |
32 # program identification |
| |
33 my $progname = "search"; |
| |
34 my $progvers = "0.1.0"; |
| |
35 |
| |
36 # determine reasonable temporary directory |
| |
37 my $tmpdir = ($ENV{"TMPDIR"} || "/tmp"); |
| |
38 |
| |
39 # use unbuffered stdout |
| |
40 $|++; |
| |
41 |
| |
42 ## |
| |
43 ## COMMAND-LINE INTERFACE HANDLING |
| |
44 ## |
| |
45 |
| |
46 # home-brewn getopt(3) style option parser |
| |
47 sub getopts ($$@) { |
| |
48 my ($opt_spec, $opts, @argv_orig) = @_; |
| |
49 my (%optf) = map { m/(\w)/; $1 => $_ } $opt_spec =~ m/(\w:|\w)/g; |
| |
50 my (@argv, $optarg); |
| |
51 |
| |
52 foreach (@argv_orig) { |
| |
53 if (@argv) { |
| |
54 push @argv, $_; |
| |
55 } elsif (defined $optarg) { |
| |
56 if (exists $opts->{$optarg}) { |
| |
57 $opts->{$optarg} .= " $_"; |
| |
58 } else { |
| |
59 $opts->{$optarg} = $_; |
| |
60 } |
| |
61 $optarg = undef; |
| |
62 } elsif (!/^[-]/) { |
| |
63 push @argv, $_; |
| |
64 } else { |
| |
65 while (/^\-(\w)(.*)/) { |
| |
66 if (exists $optf{$1}) { |
| |
67 if (length($optf{$1}) > 1) { |
| |
68 if ($2 ne '') { |
| |
69 if (exists $opts->{$1}) { |
| |
70 $opts->{$1} .= " $2"; |
| |
71 } else { |
| |
72 $opts->{$1} = $2; |
| |
73 } |
| |
74 } else { |
| |
75 $optarg = $1; |
| |
76 } |
| |
77 last; |
| |
78 } else { |
| |
79 $opts->{$1} = 1; |
| |
80 } |
| |
81 } else { |
| |
82 warn "openpkg:$prog_name:WARNING: unknown option $_\n"; |
| |
83 } |
| |
84 $_ = "-$2"; |
| |
85 } |
| |
86 } |
| |
87 } |
| |
88 if (defined $optarg) { |
| |
89 warn "openpkg:$prog_name:WARNING: option $optarg requires an argument\n"; |
| |
90 } |
| |
91 foreach my $opt (keys %optf) { |
| |
92 if (not exists $opts->{$opt}) { |
| |
93 $opts->{$opt} = (length($optf{$opt}) > 1 ? "" : 0); |
| |
94 } |
| |
95 } |
| |
96 return @argv; |
| |
97 } |
| |
98 |
| |
99 # parse command line options |
| |
100 my $opts = {}; |
| |
101 @ARGV = getopts("hfqr:p:C:", $opts, @ARGV); |
| |
102 |
| |
103 # usage sanity check and usage help |
| |
104 sub usage { |
| |
105 my ($rc) = @_; |
| |
106 my $usage = "openpkg:$prog_name:USAGE: openpkg mirror [-h] [-f] [-C <cache-file>] [-r <remote-url>] [-p <remote-url-prefix>] <local-prefix-dir>\n"; |
| |
107 if ($rc == 0) { |
| |
108 print STDOUT $usage; |
| |
109 } |
| |
110 else { |
| |
111 print STDERR $usage; |
| |
112 } |
| |
113 exit($rc); |
| |
114 } |
| |
115 if ($opts->{"h"}) { |
| |
116 usage(0); |
| |
117 } |
| |
118 if (@ARGV != 1) { |
| |
119 usage(1); |
| |
120 } |
| |
121 |
| |
122 # determine arguments |
| |
123 my ($localprefixdir) = @ARGV; |
| |
124 if (not -d $localprefixdir) { |
| |
125 print STDERR "openpkg:$prog_name:ERROR: local directory already has to exist: $localprefixdir\n"; |
| |
126 exit(1); |
| |
127 } |
| |
128 |
| |
129 # determine start URL |
| |
130 my $url = $opts->{"r"} || ""; |
| |
131 if ($url eq "") { |
| |
132 $url = `$my_prefix/bin/openpkg release --fmt='%u' 2>/dev/null`; |
| |
133 $url =~ s/^\s+//s; |
| |
134 $url =~ s/\s+$//s; |
| |
135 } |
| |
136 if ($url eq "") { |
| |
137 print STDERR "openpkg:$prog_name:ERROR: no repository URL known\n"; |
| |
138 exit(1); |
| |
139 } |
| |
140 |
| |
141 # determine URL prefix |
| |
142 my $remoteprefixurl = $opts->{"p"}; |
| |
143 if (not defined $remoteprefixurl) { |
| |
144 $remoteprefixurl = $url; |
| |
145 $remoteprefixurl =~ s/\/([^\/]+)$//s; |
| |
146 } |
| |
147 |
| |
148 ## |
| |
149 ## HELPER FUNCTIONS |
| |
150 ## |
| |
151 |
| |
152 # determine size and mtime of a file |
| |
153 sub sizetime ($) { |
| |
154 my ($file) = @_; |
| |
155 my @stat = stat($file); |
| |
156 return ($stat[7], $stat[9]); |
| |
157 } |
| |
158 |
| |
159 # determine MD5 of a file |
| |
160 sub digest ($) { |
| |
161 my ($file) = @_; |
| |
162 my $digest = ""; |
| |
163 my $output = `($my_openssl md5 -hex "$file") 2>/dev/null || true`; |
| |
164 if ($output =~ m/MD5\(.+?\)=\s*([0-9a-fA-F]+)/s) { |
| |
165 $digest = uc($1); |
| |
166 } |
| |
167 return $digest; |
| |
168 } |
| |
169 |
| |
170 # print a byte size in human-readable form |
| |
171 sub humansize ($) { |
| |
172 my ($bytes) = @_; |
| |
173 my $size = ""; |
| |
174 if ($bytes > 1024*1024*1024) { $size = sprintf("%.1fGB", $bytes / (1024*1024*1024)); } |
| |
175 elsif ($bytes > 1024*1024) { $size = sprintf("%.1fMB", $bytes / (1024*1024) ); } |
| |
176 elsif ($bytes > 1024) { $size = sprintf("%.1fKB", $bytes / (1024) ); } |
| |
177 else { $size = sprintf("%dB", $bytes ); } |
| |
178 return $size; |
| |
179 } |
| |
180 |
| |
181 # caching support: load cache |
| |
182 sub cache_load () { |
| |
183 my $cache = {}; |
| |
184 if ($opts->{"C"}) { |
| |
185 if (-f $opts->{"C"}) { |
| |
186 # load cache file |
| |
187 open(FP, "<" . $opts->{"C"}) |
| |
188 or die "unable to load cache file: " . $opts->{"C"}; |
| |
189 my $txt = ""; $txt .= $_ while (<FP>); |
| |
190 close(FP); |
| |
191 |
| |
192 # parse cache file |
| |
193 $txt =~ s/^\s*(\d+)\s+(\d+)\s+([\da-fA-F]+)\s+(.+)$/ |
| |
194 $cache->{$4} = { size => $1, mtime => $2, md5 => $3 }, '' |
| |
195 /mge; |
| |
196 } |
| |
197 } |
| |
198 return $cache; |
| |
199 } |
| |
200 |
| |
201 # caching support: save cache |
| |
202 sub cache_save ($) { |
| |
203 my ($cache) = @_; |
| |
204 if ($opts->{"C"}) { |
| |
205 # generate cache file |
| |
206 my $txt = ""; |
| |
207 foreach my $url (sort { $a cmp $b } keys %{$cache}) { |
| |
208 $txt .= sprintf("%10d %10d %s %s\n", |
| |
209 $cache->{$url}->{"size"}, |
| |
210 $cache->{$url}->{"mtime"}, |
| |
211 $cache->{$url}->{"md5"}, |
| |
212 $url |
| |
213 ); |
| |
214 } |
| |
215 |
| |
216 # write cache file |
| |
217 open(FP, ">" . $opts->{"C"}) |
| |
218 or die "unable to save cache file: " . $opts->{"C"}; |
| |
219 print FP $txt; |
| |
220 close(FP); |
| |
221 } |
| |
222 return; |
| |
223 } |
| |
224 |
| |
225 ## |
| |
226 ## DETERMINE LOCAL AND REMOTE FILES |
| |
227 ## |
| |
228 |
| |
229 # optionally load the cache |
| |
230 my $cache = cache_load(); |
| |
231 |
| |
232 # determine local files |
| |
233 print "++ determining local files\n"; |
| |
234 my @local_files = map { |
| |
235 s/\r?\n$//s; |
| |
236 s/\/$//s; |
| |
237 $_; |
| |
238 } `find $localprefixdir -depth -print`; |
| |
239 print "-- local: " . @local_files . " files\n"; |
| |
240 |
| |
241 # determine remote files |
| |
242 print "++ determining remote files\n"; |
| |
243 sub relurl ($$) { |
| |
244 my ($url, $suburl) = @_; |
| |
245 if ($suburl =~ m/^\w+:\/\//) { |
| |
246 $url = $suburl; |
| |
247 } |
| |
248 elsif ($suburl =~ m/^\//) { |
| |
249 $url = "file://$suburl"; |
| |
250 } |
| |
251 else { |
| |
252 $url =~ s/(\/)?\/*[^\/]*$/$1$suburl/; |
| |
253 } |
| |
254 1 while ($url =~ s/\/\.\//\//s); |
| |
255 1 while ($url =~ s/\/[^\/]+\/\.\.\//\//s); |
| |
256 return $url; |
| |
257 } |
| |
258 sub parse_attr ($) { |
| |
259 my ($xml) = @_; |
| |
260 my $info = { href => "", size => -1, mtime => -1, md5 => "-" }; |
| |
261 $info->{"href"} = $1 if ($xml =~ m/\bhref="([^"]*)"/s); |
| |
262 $info->{"size"} = $1 if ($xml =~ m/\bsize="([^"]*)"/s); |
| |
263 $info->{"mtime"} = $1 if ($xml =~ m/\bmtime="([^"]*)"/s); |
| |
264 $info->{"md5"} = $1 if ($xml =~ m/\bmd5="([^"]*)"/s); |
| |
265 return $info; |
| |
266 } |
| |
267 sub get_urls ($) { |
| |
268 my ($info) = @_; |
| |
269 |
| |
270 # determine URL |
| |
271 my $url = $info->{"href"}; |
| |
272 if ($url =~ m/^\//) { |
| |
273 $url = "file://$url"; |
| |
274 } |
| |
275 if ($url =~ m/\/$/) { |
| |
276 $url .= "00INDEX.rdf"; |
| |
277 } |
| |
278 $info->{"href"} = $url; |
| |
279 |
| |
280 # fetch URL content |
| |
281 print "-- fetching XML/RDF index: $url\n"; |
| |
282 my $tmpfile = "$tmpdir/openpkg-mirror.tmp"; |
| |
283 unlink($tmpfile); |
| |
284 my $cmd = "$my_prefix/bin/openpkg curl -k -s -R -o \"$tmpfile\" \"$url\""; |
| |
285 system($cmd); |
| |
286 my $xml; |
| |
287 if ($url =~ m/\.bz2$/) { |
| |
288 $xml = `$my_prefix/lib/openpkg/bzip2 -d -c <"$tmpfile"`; |
| |
289 } |
| |
290 else { |
| |
291 $xml = `cat "$tmpfile"`; |
| |
292 } |
| |
293 |
| |
294 # provide information about this URL |
| |
295 my @urls = (); |
| |
296 if ($info->{"size"} == -1 or $info->{"mtime"} == -1) { |
| |
297 my ($S, $T) = sizetime($tmpfile); |
| |
298 $info->{"size"} = $S; |
| |
299 $info->{"mtime"} = $T; |
| |
300 } |
| |
301 if ($info->{"md5"} eq "-") { |
| |
302 $info->{"md5"} = digest($tmpfile); |
| |
303 } |
| |
304 push(@urls, $info); |
| |
305 unlink($tmpfile); |
| |
306 |
| |
307 # parse references |
| |
308 my @ref1 = (); |
| |
309 my @ref2 = (); |
| |
310 while ($xml =~ m/<Repository\s+([^>]*href=[^>]*)>/gs) { |
| |
311 push(@ref1, $1); |
| |
312 } |
| |
313 while ($xml =~ m/<rdf:Description\s+([^>]*href=[^>]*)>/gs) { |
| |
314 push(@ref2, $1); |
| |
315 } |
| |
316 |
| |
317 # provide information about referenced leave URLs |
| |
318 foreach my $ref2 (@ref2) { |
| |
319 my $i = parse_attr($ref2); |
| |
320 $i->{"href"} = relurl($url, $i->{"href"}); |
| |
321 push(@urls, $i); |
| |
322 } |
| |
323 |
| |
324 # provide information about referenced index URLs |
| |
325 foreach my $ref1 (@ref1) { |
| |
326 my $i = parse_attr($ref1); |
| |
327 $i->{"href"} = relurl($url, $i->{"href"}); |
| |
328 push(@urls, get_urls($i)); # RECURSION |
| |
329 } |
| |
330 |
| |
331 return @urls; |
| |
332 } |
| |
333 my @remote_urls = get_urls({ href => $url, size => -1, mtime => -1, md5 => "-" }); |
| |
334 my $size = 0; |
| |
335 foreach my $url (@remote_urls) { |
| |
336 $size += int($url->{"size"}) if ($url->{"size"} > 0); |
| |
337 } |
| |
338 print "-- remote: " . @remote_urls . " files (" . humansize($size) . ")\n"; |
| |
339 |
| |
340 ## |
| |
341 ## DOWNLOAD NEW OR UPDATED REMOTE FILES |
| |
342 ## |
| |
343 |
| |
344 # fetching remote files |
| |
345 print "++ fetching new/updated files\n"; |
| |
346 my $local_files_keep = {}; |
| |
347 sub keep ($) { |
| |
348 my ($path) = @_; |
| |
349 # keep path and all its parent paths |
| |
350 while ($path ne "") { |
| |
351 $local_files_keep->{$path} = 1; |
| |
352 $path =~ s/(?:^\/$|\/?[^\/]+$)//s; |
| |
353 } |
| |
354 } |
| |
355 sub mkdir_for_file ($$) { |
| |
356 my ($prefix, $path) = @_; |
| |
357 $path =~ s/(?:^\/$|\/?[^\/]+$)//s; |
| |
358 my @dirs = split("/", $path); |
| |
359 $path = $prefix; |
| |
360 $path =~ s/\/$//s; |
| |
361 foreach my $dir (@dirs) { |
| |
362 $path .= "/$dir"; |
| |
363 if (not -d $path) { |
| |
364 print "-- creating: $path\n"; |
| |
365 system("mkdir \"$path\""); |
| |
366 } |
| |
367 } |
| |
368 } |
| |
369 foreach my $remote_url (@remote_urls) { |
| |
370 my $href = $remote_url->{"href"}; |
| |
371 if ($href =~ m/^\Q$remoteprefixurl\E(.+)$/) { |
| |
372 # derive local path |
| |
373 my $local_path = $1; |
| |
374 my $local_file = $localprefixdir . $local_path; |
| |
375 keep($local_file); |
| |
376 |
| |
377 # determine information about local path |
| |
378 my $download = 1; |
| |
379 my $cache_update = 0; |
| |
380 my ($size, $mtime, $md5); |
| |
381 if (-f $local_file) { |
| |
382 my $c = $cache->{$local_file}; |
| |
383 $size = $c->{"size"} if (defined $c and defined $c->{"size"}); |
| |
384 $mtime = $c->{"mtime"} if (defined $c and defined $c->{"mtime"}); |
| |
385 if (not defined $size or not defined $mtime) { |
| |
386 ($size, $mtime) = sizetime($local_file); |
| |
387 $cache_update = 1; |
| |
388 } |
| |
389 if ( $size == $remote_url->{"size"} |
| |
390 and $mtime == $remote_url->{"mtime"}) { |
| |
391 $md5 = $c->{"md5"} if (defined $c and defined $c->{"md5"}); |
| |
392 if (not defined $md5) { |
| |
393 $md5 = digest($local_file); |
| |
394 $cache_update = 1; |
| |
395 } |
| |
396 if ($md5 eq $remote_url->{"md5"}) { |
| |
397 $download = 0; |
| |
398 } |
| |
399 } |
| |
400 } |
| |
401 |
| |
402 # optionally download remote file to local path |
| |
403 if ($download) { |
| |
404 print "-- fetching: $local_path\n"; |
| |
405 mkdir_for_file($localprefixdir, $local_path); |
| |
406 my $cmd = "$my_prefix/bin/openpkg curl"; |
| |
407 $cmd .= " -s" if ( $opts->{"q"}); |
| |
408 $cmd .= " \"-#\"" if (not $opts->{"q"}); |
| |
409 $cmd .= " -k -R -o \"$local_file\" \"$href\""; |
| |
410 system($cmd); |
| |
411 ($size, $mtime) = sizetime($local_file); |
| |
412 $md5 = digest($local_file); |
| |
413 $cache_update = 1; |
| |
414 } |
| |
415 |
| |
416 # optionally update the size/mtime/md5 cache |
| |
417 if ($opts->{"C"} and $cache_update) { |
| |
418 $cache->{$local_file} = { size => $size, mtime => $mtime, md5 => $md5 }; |
| |
419 cache_save($cache); |
| |
420 } |
| |
421 } |
| |
422 else { |
| |
423 print "** WARNING: URL \"$href\" not under prefix \"$remoteprefixurl\": " . |
| |
424 "unable to derive local path -- skipping URL\n"; |
| |
425 } |
| |
426 } |
| |
427 |
| |
428 ## |
| |
429 ## REMOVE OBSOLETE LOCAL FILES |
| |
430 ## |
| |
431 |
| |
432 # remove obsolete files |
| |
433 print "++ removing obsolete files\n"; |
| |
434 my $lf = @local_files; |
| |
435 my $lfk = keys %{$local_files_keep}; |
| |
436 if (not $opts->{"f"} and ($lf - $lfk) > ($lf * 0.3)) { |
| |
437 print "** WARNING: more than 30% of your existing local files would be removed (require option -f)\n"; |
| |
438 } |
| |
439 else { |
| |
440 foreach my $local_file (@local_files) { |
| |
441 if (not exists $local_files_keep->{$local_file}) { |
| |
442 if (-f $local_file) { |
| |
443 print "-- removing: $local_file (file)\n"; |
| |
444 system("rm \"$local_file\" >/dev/null 2>&1 || true"); |
| |
445 } |
| |
446 elsif (-d $local_file) { |
| |
447 print "-- removing: $local_file (dir)\n"; |
| |
448 system("rmdir \"$local_file\" >/dev/null 2>&1 || true"); |
| |
449 } |
| |
450 } |
| |
451 } |
| |
452 } |
| |
453 |
| |
454 ## |
| |
455 ## CLEANUP |
| |
456 ## |
| |
457 |
| |
458 # optionally flush the cache |
| |
459 cache_save($cache); |
| |
460 |
| |
461 # exit gracefully |
| |
462 exit(0); |
| |
463 |