anduin revised this gist . Go to revision
1 file changed, 1247 insertions
apt-mirror(file created)
@@ -0,0 +1,1247 @@ | |||
1 | + | #!/usr/bin/perl | |
2 | + | ||
3 | + | =pod | |
4 | + | ||
5 | + | =head1 NAME | |
6 | + | ||
7 | + | apt-mirror - apt sources mirroring tool | |
8 | + | ||
9 | + | =head1 SYNOPSIS | |
10 | + | ||
11 | + | apt-mirror [configfile] | |
12 | + | ||
13 | + | =head1 DESCRIPTION | |
14 | + | ||
15 | + | A small and efficient tool that lets you mirror a part of or | |
16 | + | the whole Debian GNU/Linux distribution or any other apt sources. | |
17 | + | ||
18 | + | Main features: | |
19 | + | * It uses a config similar to APT's F<sources.list> | |
20 | + | * It's fully pool compliant | |
21 | + | * It supports multithreaded downloading | |
22 | + | * It supports multiple architectures at the same time | |
23 | + | * It can automatically remove unneeded files | |
24 | + | * It works well on an overloaded Internet connection | |
25 | + | * It never produces an inconsistent mirror including while mirroring | |
26 | + | * It works on all POSIX compliant systems with Perl and wget | |
27 | + | ||
28 | + | =head1 COMMENTS | |
29 | + | ||
30 | + | apt-mirror uses F</etc/apt/mirror.list> as a configuration file. | |
31 | + | By default it is tuned to official Debian or Ubuntu mirrors. Change | |
32 | + | it for your needs. | |
33 | + | ||
34 | + | After you setup the configuration file you may run as root: | |
35 | + | ||
36 | + | # su - apt-mirror -c apt-mirror | |
37 | + | ||
38 | + | Or uncomment the line in F</etc/cron.d/apt-mirror> to enable daily mirror updates. | |
39 | + | ||
40 | + | =head1 FILES | |
41 | + | ||
42 | + | F</etc/apt/mirror.list> | |
43 | + | Main configuration file | |
44 | + | ||
45 | + | F</etc/cron.d/apt-mirror> | |
46 | + | Cron configuration template | |
47 | + | ||
48 | + | F</var/spool/apt-mirror/mirror> | |
49 | + | Mirror places here | |
50 | + | ||
51 | + | F</var/spool/apt-mirror/skel> | |
52 | + | Place for temporarily downloaded indexes | |
53 | + | ||
54 | + | F</var/spool/apt-mirror/var> | |
55 | + | Log files placed here. URLs and MD5 checksums also here. | |
56 | + | ||
57 | + | =head1 CONFIGURATION EXAMPLES | |
58 | + | ||
59 | + | The mirror.list configuration supports many options, the file is well commented explaining each option. | |
60 | + | Here are some sample mirror configuration lines showing the various supported ways: | |
61 | + | ||
62 | + | Normal: | |
63 | + | deb http://example.com/debian stable main contrib non-free | |
64 | + | ||
65 | + | Arch Specific: (many other architectures are supported) | |
66 | + | deb-powerpc http://example.com/debian stable main contrib non-free | |
67 | + | ||
68 | + | HTTP and FTP Auth or non-standard port: | |
69 | + | deb http://user:pass@example.com:8080/debian stable main contrib non-free | |
70 | + | ||
71 | + | HTTPS with sending Basic HTTP authentication information (plaintext username and password) for all requests: | |
72 | + | (this was default behaviour of Wget 1.10.2 and prior and is needed for some servers with new version of Wget) | |
73 | + | set auth_no_challenge 1 | |
74 | + | deb https://user:pass@example.com:443/debian stable main contrib non-free | |
75 | + | ||
76 | + | HTTPS without checking certificate: | |
77 | + | set no_check_certificate 1 | |
78 | + | deb https://example.com:443/debian stable main contrib non-free | |
79 | + | ||
80 | + | Source Mirroring: | |
81 | + | deb-src http://example.com/debian stable main contrib non-free | |
82 | + | ||
83 | + | =head1 AUTHORS | |
84 | + | ||
85 | + | Dmitry N. Hramtsov E<lt>hdn@nsu.ruE<gt> | |
86 | + | Brandon Holtsclaw E<lt>me@brandonholtsclaw.comE<gt> | |
87 | + | ||
88 | + | =cut | |
89 | + | ||
90 | + | use warnings; | |
91 | + | use strict; | |
92 | + | use File::Copy; | |
93 | + | use File::Compare; | |
94 | + | use File::Path qw(make_path); | |
95 | + | use File::Basename; | |
96 | + | use Fcntl qw(:flock); | |
97 | + | ||
98 | + | my $config_file; | |
99 | + | ||
100 | + | my %config_variables = ( | |
101 | + | "defaultarch" => `dpkg --print-architecture 2>/dev/null` || 'i386', | |
102 | + | "nthreads" => 20, | |
103 | + | "base_path" => '/var/spool/apt-mirror', | |
104 | + | "mirror_path" => '$base_path/mirror', | |
105 | + | "skel_path" => '$base_path/skel', | |
106 | + | "var_path" => '$base_path/var', | |
107 | + | "cleanscript" => '$var_path/clean.sh', | |
108 | + | "_contents" => 1, | |
109 | + | "_autoclean" => 0, | |
110 | + | "_tilde" => 0, | |
111 | + | "_plus" => 0, | |
112 | + | "limit_rate" => '100m', | |
113 | + | "run_postmirror" => 1, | |
114 | + | "auth_no_challenge" => 0, | |
115 | + | "no_check_certificate" => 0, | |
116 | + | "unlink" => 0, | |
117 | + | "paranoid" => 0, | |
118 | + | "postmirror_script" => '$var_path/postmirror.sh', | |
119 | + | "use_proxy" => 'off', | |
120 | + | "http_proxy" => '', | |
121 | + | "https_proxy" => '', | |
122 | + | "proxy_user" => '', | |
123 | + | "proxy_password" => '' | |
124 | + | ); | |
125 | + | ||
126 | + | my @config_binaries = (); | |
127 | + | my @config_sources = (); | |
128 | + | ||
129 | + | my @release_urls; | |
130 | + | my @index_urls; | |
131 | + | my @childrens = (); | |
132 | + | my %skipclean = (); | |
133 | + | my %clean_directory = (); | |
134 | + | my @hash_strength = qw(SHA512 SHA256 SHA1 MD5Sum); | |
135 | + | my %packages_hashes = ( | |
136 | + | SHA512 => "SHA512", | |
137 | + | SHA256 => "SHA256", | |
138 | + | SHA1 => "SHA1", | |
139 | + | MD5Sum => "MD5sum", | |
140 | + | ); | |
141 | + | my %sources_hashes = ( | |
142 | + | SHA512 => "Checksums-Sha512", | |
143 | + | SHA256 => "Checksums-Sha256", | |
144 | + | SHA1 => "Checksums-Sha1", | |
145 | + | MD5Sum => "Files", | |
146 | + | ); | |
147 | + | my %verify_commands = ( | |
148 | + | SHA512 => "sha512sum", | |
149 | + | SHA256 => "sha256sum", | |
150 | + | SHA1 => "sha1sum", | |
151 | + | MD5Sum => "md5sum", | |
152 | + | ); | |
153 | + | my %checksum_filenames = ( | |
154 | + | SHA512 => "SHA512", | |
155 | + | SHA256 => "SHA256", | |
156 | + | SHA1 => "SHA1", | |
157 | + | MD5Sum => "MD5", | |
158 | + | ); | |
159 | + | ||
160 | + | # Mapping of files downloaded from a by-hash directory to their canonical locations. | |
161 | + | my %hashsum_to_files = (); | |
162 | + | ||
163 | + | # Mapping of all the checksums for a given canonical filename. | |
164 | + | my %file_to_hashsums; | |
165 | + | my %urls_checksums = (); | |
166 | + | ||
167 | + | ###################################################################################### | |
168 | + | ## Setting up $config_file variable | |
169 | + | ||
170 | + | $config_file = "/etc/apt/mirror.list"; # Default value | |
171 | + | if ( $_ = shift ) | |
172 | + | { | |
173 | + | die("apt-mirror: invalid config file specified") unless -e $_; | |
174 | + | $config_file = $_; | |
175 | + | } | |
176 | + | ||
177 | + | chomp $config_variables{"defaultarch"}; | |
178 | + | ||
179 | + | ###################################################################################### | |
180 | + | ## Common subroutines | |
181 | + | ||
182 | + | sub round_number | |
183 | + | { | |
184 | + | my $n = shift; | |
185 | + | my $minus = $n < 0 ? '-' : ''; | |
186 | + | $n = abs($n); | |
187 | + | $n = int( ( $n + .05 ) * 10 ) / 10; | |
188 | + | $n .= '.0' unless $n =~ /\./; | |
189 | + | $n .= '0' if substr( $n, ( length($n) - 1 ), 1 ) eq '.'; | |
190 | + | chop $n if $n =~ /\.\d\d0$/; | |
191 | + | return "$minus$n"; | |
192 | + | } | |
193 | + | ||
194 | + | sub format_bytes | |
195 | + | { | |
196 | + | my $bytes = shift; | |
197 | + | my $bytes_out = '0'; | |
198 | + | my $size_name = 'bytes'; | |
199 | + | my $KiB = 1024; | |
200 | + | my $MiB = 1024 * 1024; | |
201 | + | my $GiB = 1024 * 1024 * 1024; | |
202 | + | ||
203 | + | if ( $bytes >= $KiB ) | |
204 | + | { | |
205 | + | $bytes_out = $bytes / $KiB; | |
206 | + | $size_name = 'KiB'; | |
207 | + | if ( $bytes >= $MiB ) | |
208 | + | { | |
209 | + | $bytes_out = $bytes / $MiB; | |
210 | + | $size_name = 'MiB'; | |
211 | + | if ( $bytes >= $GiB ) | |
212 | + | { | |
213 | + | $bytes_out = $bytes / $GiB; | |
214 | + | $size_name = 'GiB'; | |
215 | + | } | |
216 | + | } | |
217 | + | $bytes_out = round_number($bytes_out); | |
218 | + | } | |
219 | + | else | |
220 | + | { | |
221 | + | $bytes_out = $bytes; | |
222 | + | $size_name = 'bytes'; | |
223 | + | } | |
224 | + | ||
225 | + | return "$bytes_out $size_name"; | |
226 | + | } | |
227 | + | ||
228 | + | sub get_variable | |
229 | + | { | |
230 | + | my $value = $config_variables{ shift @_ }; | |
231 | + | my $count = 16; | |
232 | + | while ( $value =~ s/\$(\w+)/$config_variables{$1}/xg ) | |
233 | + | { | |
234 | + | die("apt-mirror: too many substitution while evaluating variable") if ( $count-- ) < 0; | |
235 | + | } | |
236 | + | return $value; | |
237 | + | } | |
238 | + | ||
239 | + | sub quoted_path | |
240 | + | { | |
241 | + | my $path = shift; | |
242 | + | $path =~ s/'/'\\''/g; | |
243 | + | return "'" . $path . "'"; | |
244 | + | } | |
245 | + | ||
246 | + | sub lock_aptmirror | |
247 | + | { | |
248 | + | open( LOCK_FILE, '>', get_variable("var_path") . "/apt-mirror.lock" ); | |
249 | + | my $lock = flock( LOCK_FILE, LOCK_EX | LOCK_NB ); | |
250 | + | if ( !$lock ) | |
251 | + | { | |
252 | + | die("apt-mirror is already running, exiting"); | |
253 | + | } | |
254 | + | } | |
255 | + | ||
256 | + | sub unlock_aptmirror | |
257 | + | { | |
258 | + | close(LOCK_FILE); | |
259 | + | unlink( get_variable("var_path") . "/apt-mirror.lock" ); | |
260 | + | } | |
261 | + | ||
262 | + | sub delete_corrupted_files | |
263 | + | { | |
264 | + | my $stage = shift; | |
265 | + | my $found = 0; | |
266 | + | foreach my $hash (@hash_strength) | |
267 | + | { | |
268 | + | my $file = get_variable("var_path") . "/${stage}-${hash}"; | |
269 | + | if (-s $file) | |
270 | + | { | |
271 | + | my $pipe; | |
272 | + | open $pipe, "-|", qq(env LC_ALL=C ${verify_commands{$hash}} --check --quiet ${file} 2>/dev/null) or die "Cannot run ${verify_commands{$hash}}"; | |
273 | + | while (<$pipe>) | |
274 | + | { | |
275 | + | my ($filename) = /^(.*): FAILED/; | |
276 | + | if (-f $filename) | |
277 | + | { | |
278 | + | $found++; | |
279 | + | print "$filename is corrupted, deleting....\n"; | |
280 | + | unlink $filename or die "Cannot delete $filename."; | |
281 | + | } | |
282 | + | } | |
283 | + | close $pipe; | |
284 | + | } | |
285 | + | } | |
286 | + | return $found; | |
287 | + | } | |
288 | + | ||
289 | + | sub download_urls | |
290 | + | { | |
291 | + | my $stage = shift; | |
292 | + | my @urls; | |
293 | + | my $i = 0; | |
294 | + | my $pid; | |
295 | + | my $nthreads = get_variable("nthreads"); | |
296 | + | my @args = (); | |
297 | + | local $| = 1; | |
298 | + | ||
299 | + | @urls = @_; | |
300 | + | $nthreads = @urls if @urls < $nthreads; | |
301 | + | ||
302 | + | if ( get_variable("auth_no_challenge") == 1 ) { push( @args, "--auth-no-challenge" ); } | |
303 | + | if ( get_variable("no_check_certificate") == 1 ) { push( @args, "--no-check-certificate" ); } | |
304 | + | if ( get_variable("unlink") == 1 ) { push( @args, "--unlink" ); } | |
305 | + | if ( length( get_variable("use_proxy") ) && ( get_variable("use_proxy") eq 'yes' || get_variable("use_proxy") eq 'on' ) ) | |
306 | + | { | |
307 | + | if ( length( get_variable("http_proxy") ) || length( get_variable("https_proxy") ) ) { push( @args, "-e use_proxy=yes" ); } | |
308 | + | if ( length( get_variable("http_proxy") ) ) { push( @args, "-e http_proxy=" . get_variable("http_proxy") ); } | |
309 | + | if ( length( get_variable("https_proxy") ) ) { push( @args, "-e https_proxy=" . get_variable("https_proxy") ); } | |
310 | + | if ( length( get_variable("proxy_user") ) ) { push( @args, "-e proxy_user=" . get_variable("proxy_user") ); } | |
311 | + | if ( length( get_variable("proxy_password") ) ) { push( @args, "-e proxy_password=" . get_variable("proxy_password") ); } | |
312 | + | } | |
313 | + | print "Downloading " . scalar(@urls) . " $stage files using $nthreads threads...\n"; | |
314 | + | ||
315 | + | if (get_variable("paranoid")) | |
316 | + | { | |
317 | + | my %fh = (); | |
318 | + | foreach my $hash (@hash_strength) | |
319 | + | { | |
320 | + | open $fh{$hash}, ">", get_variable("var_path") . "/${stage}-${hash}" or die ("apt-mirror: Cannot write to ${stage}-${hash}"); | |
321 | + | } | |
322 | + | ||
323 | + | foreach (@urls) | |
324 | + | { | |
325 | + | if ($urls_checksums{$_}) | |
326 | + | { | |
327 | + | my ($hash, $hashsum) = @{$urls_checksums{$_}}; | |
328 | + | my $fh = $fh{$hash}; | |
329 | + | print $fh $hashsum . " " . sanitise_uri($_) . "\n"; | |
330 | + | } | |
331 | + | } | |
332 | + | foreach my $hash (@hash_strength) | |
333 | + | { | |
334 | + | close $fh{$hash}; | |
335 | + | } | |
336 | + | } | |
337 | + | ||
338 | + | my @url_fds; | |
339 | + | for ($i=0; $i<$nthreads; $i++) | |
340 | + | { | |
341 | + | open ( $url_fds[$i], ">", get_variable("var_path") . "/$stage-urls.$i") or die("apt-mirror: can't write to intermediate file ($stage-urls.$i)"); | |
342 | + | } | |
343 | + | ||
344 | + | for ($i=scalar(@urls)-1; $i>=0; $i--) | |
345 | + | { | |
346 | + | my $thread = $i % $nthreads; | |
347 | + | print { $url_fds[$thread] } $urls[$i] . "\n"; | |
348 | + | } | |
349 | + | ||
350 | + | foreach (@url_fds) { | |
351 | + | close $_ or die("apt-mirror: can't close intermediate file ($stage-urls.$i)"); | |
352 | + | } | |
353 | + | ||
354 | + | for ($i=0; $i<$nthreads; $i++) | |
355 | + | { | |
356 | + | ||
357 | + | $pid = fork(); | |
358 | + | ||
359 | + | die("apt-mirror: can't do fork in download_urls") if !defined($pid); | |
360 | + | ||
361 | + | if ( $pid == 0 ) | |
362 | + | { | |
363 | + | exec 'wget', '--no-if-modified-since', '--no-cache', '--limit-rate=' . get_variable("limit_rate"), '-T', '60', '-t', '1', '-r', '-N', '-l', 'inf', '-o', get_variable("var_path") . "/$stage-log.$i", '-i', get_variable("var_path") . "/$stage-urls.$i", @args; | |
364 | + | ||
365 | + | # shouldn't reach this unless exec fails | |
366 | + | die("\n\nCould not run wget, please make sure its installed and in your path\n\n"); | |
367 | + | } | |
368 | + | ||
369 | + | push @childrens, $pid; | |
370 | + | } | |
371 | + | ||
372 | + | print "Begin time: " . localtime() . "\n[" . scalar(@childrens) . "]... "; | |
373 | + | while ( scalar @childrens ) | |
374 | + | { | |
375 | + | my $dead = wait(); | |
376 | + | @childrens = grep { $_ != $dead } @childrens; | |
377 | + | print "[" . scalar(@childrens) . "]... "; | |
378 | + | } | |
379 | + | print "\nEnd time: " . localtime() . "\n\n"; | |
380 | + | ||
381 | + | if (get_variable("paranoid")) | |
382 | + | { | |
383 | + | if (delete_corrupted_files($stage) > 0) | |
384 | + | { | |
385 | + | die "Some files were corrupted while downloading, aborting..."; | |
386 | + | } | |
387 | + | } | |
388 | + | ||
389 | + | if (scalar keys %hashsum_to_files > 0) | |
390 | + | { | |
391 | + | foreach my $hashsum_filename (keys %hashsum_to_files) | |
392 | + | { | |
393 | + | foreach my $filename (@{$hashsum_to_files{$hashsum_filename}}) | |
394 | + | { | |
395 | + | copy_file( $hashsum_filename, $filename ); | |
396 | + | } | |
397 | + | } | |
398 | + | } | |
399 | + | ||
400 | + | } | |
401 | + | ||
402 | + | ## Parse config | |
403 | + | ||
404 | + | sub parse_config_line | |
405 | + | { | |
406 | + | my $pattern_deb_line = qr/^[\t ]*(?<type>deb-src|deb)(?:-(?<arch>[\w\-]+))?[\t ]+(?:\[(?<options>[^\]]+)\][\t ]+)?(?<uri>[^\s]+)[\t ]+(?<components>.+)$/; | |
407 | + | my $line = $_; | |
408 | + | my %config; | |
409 | + | if ( $line =~ $pattern_deb_line ) { | |
410 | + | $config{'type'} = $+{type}; | |
411 | + | $config{'arch'} = $+{arch}; | |
412 | + | $config{'options'} = $+{options} ? $+{options} : ""; | |
413 | + | $config{'uri'} = $+{uri}; | |
414 | + | $config{'components'} = $+{components}; | |
415 | + | if ( $config{'options'} =~ /arch=((?<arch>[\w\-]+)[,]*)/g ) { | |
416 | + | $config{'arch'} = $+{arch}; | |
417 | + | } | |
418 | + | $config{'components'} = [ split /\s+/, $config{'components'} ]; | |
419 | + | } elsif ( $line =~ /set[\t ]+(?<key>[^\s]+)[\t ]+(?<value>"[^"]+"|'[^']+'|[^\s]+)/ ) { | |
420 | + | $config{'type'} = 'set'; | |
421 | + | $config{'key'} = $+{key}; | |
422 | + | $config{'value'} = $+{value}; | |
423 | + | $config{'value'} =~ s/^'(.*)'$/$1/; | |
424 | + | $config{'value'} =~ s/^"(.*)"$/$1/; | |
425 | + | } elsif ( $line =~ /(?<type>clean|skip-clean)[\t ]+(?<uri>[^\s]+)/ ) { | |
426 | + | $config{'type'} = $+{type}; | |
427 | + | $config{'uri'} = $+{uri}; | |
428 | + | } | |
429 | + | ||
430 | + | return %config; | |
431 | + | } | |
432 | + | ||
433 | + | sub sanitise_uri | |
434 | + | { | |
435 | + | my $uri = shift; | |
436 | + | $uri =~ s[^(\w+)://][]; | |
437 | + | $uri =~ s/^([^@]+)?@?// if (split '/',$uri)[0] =~ /@/; | |
438 | + | $uri =~ s/~/\%7E/g if get_variable("_tilde"); | |
439 | + | $uri =~ s/\+/\%2B/g if get_variable("_plus"); | |
440 | + | $uri =~ s[/$][]; | |
441 | + | return $uri; | |
442 | + | } | |
443 | + | ||
444 | + | open CONFIG, "<$config_file" or die("apt-mirror: can't open config file ($config_file)"); | |
445 | + | while (<CONFIG>) | |
446 | + | { | |
447 | + | next if /^\s*#/; | |
448 | + | next unless /\S/; | |
449 | + | my $line = $_; | |
450 | + | my %config_line = parse_config_line; | |
451 | + | ||
452 | + | if ( $config_line{'type'} eq "set" ) { | |
453 | + | $config_variables{ $config_line{'key'} } = $config_line{'value'}; | |
454 | + | next; | |
455 | + | } elsif ( $config_line{'type'} eq "deb" ) { | |
456 | + | my $arch = $config_line{'arch'}; | |
457 | + | $arch = get_variable("defaultarch") if ! defined $config_line{'arch'}; | |
458 | + | push @config_binaries, [ $arch, $config_line{'uri'}, @{$config_line{'components'}} ]; | |
459 | + | next; | |
460 | + | } elsif ( $config_line{'type'} eq "deb-src" ) { | |
461 | + | push @config_sources, [ $config_line{'uri'}, @{$config_line{'components'}} ]; | |
462 | + | next; | |
463 | + | } elsif ( $config_line{'type'} =~ /(skip-clean|clean)/ ) { | |
464 | + | my $link = sanitise_uri($config_line{'uri'}); | |
465 | + | if ( $config_line{'type'} eq "skip-clean" ) { | |
466 | + | $skipclean{ $link } = 1; | |
467 | + | } elsif ( $config_line{'type'} eq "clean" ) { | |
468 | + | $clean_directory{ $link } = 1; | |
469 | + | } | |
470 | + | next; | |
471 | + | } | |
472 | + | ||
473 | + | die("apt-mirror: invalid line in config file ($.: $line ...)"); | |
474 | + | } | |
475 | + | close CONFIG; | |
476 | + | ||
477 | + | die("Please explicitly specify 'defaultarch' in mirror.list") unless get_variable("defaultarch"); | |
478 | + | ||
479 | + | ###################################################################################### | |
480 | + | ## Create the 3 needed directories if they don't exist yet | |
481 | + | my @needed_directories = ( get_variable("mirror_path"), get_variable("skel_path"), get_variable("var_path") ); | |
482 | + | foreach my $needed_directory (@needed_directories) | |
483 | + | { | |
484 | + | unless ( -d $needed_directory ) | |
485 | + | { | |
486 | + | make_path($needed_directory) or die("apt-mirror: can't create $needed_directory directory"); | |
487 | + | } | |
488 | + | } | |
489 | + | # | |
490 | + | ####################################################################################### | |
491 | + | ||
492 | + | lock_aptmirror(); | |
493 | + | ||
494 | + | ###################################################################################### | |
495 | + | ## Skel download | |
496 | + | ||
497 | + | my %urls_to_download = (); | |
498 | + | my ( $url, $arch ); | |
499 | + | ||
500 | + | sub remove_double_slashes | |
501 | + | { | |
502 | + | local $_ = shift; | |
503 | + | while (s[/\./][/]g) { } | |
504 | + | while (s[(?<!:)//][/]g) { } | |
505 | + | while (s[(?<!:/)/[^/]+/\.\./][/]g) { } | |
506 | + | s/~/\%7E/g if get_variable("_tilde"); | |
507 | + | s/\+/\%2B/g if get_variable("_plus"); | |
508 | + | return $_; | |
509 | + | } | |
510 | + | ||
511 | + | sub add_url_to_download | |
512 | + | { | |
513 | + | my $url = remove_double_slashes(shift); | |
514 | + | my $size = shift; | |
515 | + | my $strongest_hash = shift; | |
516 | + | my $hash = shift; | |
517 | + | my $hashsum = shift; | |
518 | + | my $acquire_by_hash = shift; | |
519 | + | ||
520 | + | my $canonical_filename = sanitise_uri($url); | |
521 | + | $skipclean{$canonical_filename} = 1; | |
522 | + | ||
523 | + | if ($acquire_by_hash) | |
524 | + | { | |
525 | + | # If the optional hashsum was passed as an argument | |
526 | + | # - download the strongest hash only | |
527 | + | # - make a copy to the canonical location | |
528 | + | # - make a copy for the other known hash versions | |
529 | + | ||
530 | + | $url = dirname($url) . "/by-hash/${hash}/${hashsum}"; | |
531 | + | ||
532 | + | my $hashsum_filename = dirname($canonical_filename) . "/by-hash/${hash}/${hashsum}"; | |
533 | + | $skipclean{$hashsum_filename} = 1; | |
534 | + | ||
535 | + | if ($hash eq $strongest_hash) | |
536 | + | { | |
537 | + | # This is the strongest hash, which is the one to download. | |
538 | + | # Also need to remember to which canonical location it should be linked. | |
539 | + | $hashsum_to_files{$hashsum_filename} ||= []; | |
540 | + | push @{$hashsum_to_files{$hashsum_filename}}, $canonical_filename; | |
541 | + | $urls_to_download{$url} = $size; | |
542 | + | $urls_checksums{$url} = [ $hash, $hashsum ]; | |
543 | + | ||
544 | + | } else { | |
545 | + | # We are not going to download using this checksum, but we still | |
546 | + | # need to know where to put the checksum. | |
547 | + | $file_to_hashsums{$canonical_filename} ||= []; | |
548 | + | push @{$file_to_hashsums{$canonical_filename}}, $hashsum_filename; | |
549 | + | } | |
550 | + | } else { | |
551 | + | # Not using by-hash, so download the file only. | |
552 | + | $urls_to_download{$url} = $size; | |
553 | + | if ($strongest_hash and ($hash eq $strongest_hash)) | |
554 | + | { | |
555 | + | $urls_checksums{$url} = [ $hash, $hashsum ]; | |
556 | + | } | |
557 | + | } | |
558 | + | } | |
559 | + | ||
560 | + | foreach (@config_sources) | |
561 | + | { | |
562 | + | my ( $uri, $distribution, @components ) = @{$_}; | |
563 | + | ||
564 | + | if (@components) | |
565 | + | { | |
566 | + | $url = $uri . "/dists/" . $distribution . "/"; | |
567 | + | } | |
568 | + | else | |
569 | + | { | |
570 | + | $url = $uri . "/" . $distribution . "/"; | |
571 | + | } | |
572 | + | ||
573 | + | add_url_to_download( $url . "InRelease" ); | |
574 | + | add_url_to_download( $url . "Release" ); | |
575 | + | add_url_to_download( $url . "Release.gpg" ); | |
576 | + | } | |
577 | + | ||
578 | + | foreach (@config_binaries) | |
579 | + | { | |
580 | + | my ( $arch, $uri, $distribution, @components ) = @{$_}; | |
581 | + | ||
582 | + | if (@components) | |
583 | + | { | |
584 | + | $url = $uri . "/dists/" . $distribution . "/"; | |
585 | + | ||
586 | + | } | |
587 | + | else | |
588 | + | { | |
589 | + | $url = $uri . "/" . $distribution . "/"; | |
590 | + | } | |
591 | + | ||
592 | + | add_url_to_download( $url . "InRelease" ); | |
593 | + | add_url_to_download( $url . "Release" ); | |
594 | + | add_url_to_download( $url . "Release.gpg" ); | |
595 | + | ||
596 | + | } | |
597 | + | ||
598 | + | chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel"); | |
599 | + | @release_urls = sort keys %urls_to_download; | |
600 | + | download_urls( "release", @release_urls ); | |
601 | + | ||
602 | + | ###################################################################################### | |
603 | + | ## Download all relevant metadata | |
604 | + | ||
605 | + | %urls_to_download = (); | |
606 | + | ||
607 | + | sub find_metadata_in_release | |
608 | + | { | |
609 | + | # Look in the Release file for any files we need to download | |
610 | + | my ( $arch, $uri, $distribution, @components ) = @_; | |
611 | + | ||
612 | + | my ( $release_uri, $release_path, $line ) = ''; | |
613 | + | my $component_regex = undef; | |
614 | + | my $arch_regex = "(?:${arch}|all)"; | |
615 | + | my $compressed_extension_regex = '(?:\.(?:gz|bz2|xz|lzma))$'; | |
616 | + | my $dist_uri; | |
617 | + | my $hash_type_regex = "(?:" . join("|", @hash_strength) . ")"; | |
618 | + | ||
619 | + | if (@components) | |
620 | + | { | |
621 | + | $dist_uri = remove_double_slashes($uri . "/dists/" . $distribution . "/"); | |
622 | + | $component_regex = "(?:" . join("|", @components) . ")"; | |
623 | + | } | |
624 | + | else { | |
625 | + | $dist_uri = remove_double_slashes($uri . "/" . $distribution . "/"); | |
626 | + | } | |
627 | + | ||
628 | + | my $stream; | |
629 | + | foreach my $release_filename ("InRelease", "Release") | |
630 | + | { | |
631 | + | $release_uri = $dist_uri . $release_filename; | |
632 | + | $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri); | |
633 | + | ||
634 | + | last if ( open $stream, "<", $release_path); | |
635 | + | $stream = undef; | |
636 | + | } | |
637 | + | ||
638 | + | unless ( $stream ) | |
639 | + | { | |
640 | + | warn( "Failed to find InRelease or Release in " . get_variable("skel_path") . "/" . sanitise_uri($dist_uri) ); | |
641 | + | return 0; | |
642 | + | } | |
643 | + | ||
644 | + | ||
645 | + | my $hash = undef; | |
646 | + | my %avaiable_hashes = (); | |
647 | + | my $acquire_by_hash = 0; | |
648 | + | my @parts_to_download = (); | |
649 | + | while ( $line = <$stream> ) | |
650 | + | { | |
651 | + | chomp $line; | |
652 | + | if ($hash) | |
653 | + | { | |
654 | + | if ( $line =~ /^ +(.*)$/ ) | |
655 | + | { | |
656 | + | my @parts = split( / +/, $1 ); | |
657 | + | if ( @parts == 3 ) | |
658 | + | { | |
659 | + | my ( $hashsum, $size, $filename ) = @parts; | |
660 | + | push @parts, $hash; | |
661 | + | if ($arch eq "source") | |
662 | + | { | |
663 | + | if ($component_regex) | |
664 | + | { | |
665 | + | # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format | |
666 | + | if ( | |
667 | + | ( | |
668 | + | $filename =~ m{^${component_regex}/source/Sources${compressed_extension_regex}} | |
669 | + | ) or ( | |
670 | + | $filename =~ m{^${component_regex}/Contents-source${compressed_extension_regex}} | |
671 | + | ) | |
672 | + | ) | |
673 | + | { | |
674 | + | push @parts_to_download, \@parts; | |
675 | + | } | |
676 | + | } else { | |
677 | + | # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format | |
678 | + | if ($filename =~ m{^Sources${compressed_extension_regex}} | |
679 | + | ) { | |
680 | + | push @parts_to_download, \@parts; | |
681 | + | } | |
682 | + | } | |
683 | + | } else { | |
684 | + | if ($component_regex) | |
685 | + | { | |
686 | + | # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format | |
687 | + | if ( | |
688 | + | ( | |
689 | + | $filename =~ m{^${component_regex}/Contents-${arch_regex}${compressed_extension_regex}} | |
690 | + | ) or ( | |
691 | + | $filename =~ m{^Contents-${arch_regex}${compressed_extension_regex}} | |
692 | + | ) or ( | |
693 | + | $filename =~ m{^Packages${compressed_extension_regex}} | |
694 | + | ) or ( | |
695 | + | $filename =~ m{^${component_regex}/binary-${arch_regex}/Packages${compressed_extension_regex}} | |
696 | + | ) or ( | |
697 | + | $filename =~ m{^${component_regex}/binary-${arch_regex}/Release$} | |
698 | + | ) or ( | |
699 | + | $filename =~ m{^${component_regex}/cnf/Commands-${arch_regex}${compressed_extension_regex}} | |
700 | + | ) or ( | |
701 | + | $filename =~ m{^${component_regex}/dep11/Components-${arch_regex}.*${compressed_extension_regex}} | |
702 | + | ) or ( | |
703 | + | $filename =~ m{^${component_regex}/dep11/icons-.*${compressed_extension_regex}} | |
704 | + | ) or ( | |
705 | + | $filename =~ m{^${component_regex}/i18n/Translation-.*${compressed_extension_regex}} | |
706 | + | ) | |
707 | + | ) | |
708 | + | { | |
709 | + | push @parts_to_download, \@parts; | |
710 | + | } | |
711 | + | } else { | |
712 | + | # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format | |
713 | + | if ($filename =~ m{^Packages${compressed_extension_regex}}) | |
714 | + | { | |
715 | + | push @parts_to_download, \@parts; | |
716 | + | } | |
717 | + | } | |
718 | + | } | |
719 | + | } | |
720 | + | else | |
721 | + | { | |
722 | + | warn("Malformed checksum line \"$1\" in $release_uri"); | |
723 | + | } | |
724 | + | } | |
725 | + | else | |
726 | + | { | |
727 | + | $hash = undef; | |
728 | + | } | |
729 | + | } | |
730 | + | if ( not $hash ) | |
731 | + | { | |
732 | + | if ( $line =~ /^(${hash_type_regex}):$/ ) | |
733 | + | { | |
734 | + | $hash = $1; | |
735 | + | $avaiable_hashes{$hash} = 1; | |
736 | + | } | |
737 | + | elsif ( $line eq "Acquire-By-Hash: yes" ) | |
738 | + | { | |
739 | + | $acquire_by_hash = 1; | |
740 | + | } | |
741 | + | } | |
742 | + | } | |
743 | + | close $stream; | |
744 | + | ||
745 | + | my $strongest_hash; | |
746 | + | if ($acquire_by_hash) | |
747 | + | { | |
748 | + | foreach (@hash_strength) | |
749 | + | { | |
750 | + | if ($avaiable_hashes{$_}) | |
751 | + | { | |
752 | + | $strongest_hash = $_; | |
753 | + | last; | |
754 | + | } | |
755 | + | } | |
756 | + | unless ($strongest_hash) | |
757 | + | { | |
758 | + | warn("Cannot find a supported hash in $release_uri, will download from canonical locations."); | |
759 | + | $acquire_by_hash = 0; | |
760 | + | } | |
761 | + | } | |
762 | + | ||
763 | + | foreach (@parts_to_download) | |
764 | + | { | |
765 | + | my ( $hashsum, $size, $filename, $hash ) = @{$_}; | |
766 | + | if ($acquire_by_hash) | |
767 | + | { | |
768 | + | add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 1 ); | |
769 | + | } | |
770 | + | else | |
771 | + | { | |
772 | + | add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 0 ); | |
773 | + | } | |
774 | + | } | |
775 | + | return 1; | |
776 | + | } | |
777 | + | ||
778 | + | print "Processing metadata files from releases ["; | |
779 | + | foreach (@config_binaries) | |
780 | + | { | |
781 | + | my ( $arch, $uri, $distribution, @components ) = @{$_}; | |
782 | + | print "M"; | |
783 | + | unless (find_metadata_in_release( $arch, $uri, $distribution, @components)) | |
784 | + | { | |
785 | + | # Insecure repo with no release file - try to get the well known indices | |
786 | + | foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "") | |
787 | + | { | |
788 | + | if (@components) | |
789 | + | { | |
790 | + | # Debian repo | |
791 | + | foreach my $component (@components) | |
792 | + | { | |
793 | + | foreach my $path ( | |
794 | + | "/dists/${distribution}/${component}/binary-${arch}/Packages", | |
795 | + | "/dists/${distribution}/${component}/binary-all/Packages", | |
796 | + | "/dists/${distribution}/${component}/Contents-${arch}", | |
797 | + | "/dists/${distribution}/${component}/Contents-all", | |
798 | + | "/dists/${distribution}/Contents-${arch}", | |
799 | + | "/dists/${distribution}/Contents-all", | |
800 | + | ) | |
801 | + | { | |
802 | + | add_url_to_download( "${uri}/${path}${file_extension}" ); | |
803 | + | } | |
804 | + | } | |
805 | + | } else { | |
806 | + | # Flat repo | |
807 | + | foreach my $path ( | |
808 | + | "${distribution}/Packages", | |
809 | + | "${distribution}/Contents-${arch}", | |
810 | + | "${distribution}/Contents-all", | |
811 | + | ) | |
812 | + | { | |
813 | + | add_url_to_download( "${uri}/${path}${file_extension}" ); | |
814 | + | } | |
815 | + | } | |
816 | + | } | |
817 | + | } | |
818 | + | } | |
819 | + | ||
820 | + | foreach (@config_sources) | |
821 | + | { | |
822 | + | my ( $uri, $distribution, @components ) = @{$_}; | |
823 | + | print "M"; | |
824 | + | unless (find_metadata_in_release( "source", $uri, $distribution, @components)) | |
825 | + | { | |
826 | + | # Insecure repo with no release file - try to get the well known indices | |
827 | + | foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "") | |
828 | + | { | |
829 | + | if (@components) | |
830 | + | { | |
831 | + | # Debian repo | |
832 | + | foreach my $path ( | |
833 | + | "${distribution}/source/Sources", | |
834 | + | "${distribution}/Contents-source", | |
835 | + | ) | |
836 | + | { | |
837 | + | add_url_to_download( "${uri}/${path}${file_extension}" ); | |
838 | + | } | |
839 | + | } else { | |
840 | + | # Flat repo | |
841 | + | add_url_to_download( "${uri}/${distribution}/Sources${file_extension}" ); | |
842 | + | } | |
843 | + | } | |
844 | + | } | |
845 | + | } | |
846 | + | print "]\n\n"; | |
847 | + | ||
848 | + | @index_urls = sort keys %urls_to_download; | |
849 | + | download_urls( "index", @index_urls ); | |
850 | + | ||
851 | + | ###################################################################################### | |
852 | + | ## Main download preparations | |
853 | + | ||
854 | + | %urls_to_download = (); | |
855 | + | ||
856 | + | my %files_fh; | |
857 | + | ||
858 | + | open $files_fh{ALL}, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)"); | |
859 | + | open $files_fh{NEW}, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)"); | |
860 | + | foreach my $hash (@hash_strength) | |
861 | + | { | |
862 | + | open $files_fh{$hash}, ">" . get_variable("var_path") . "/" . ${checksum_filenames{$hash}} or die("apt-mirror: can't write to intermediate file (${hash})"); | |
863 | + | } | |
864 | + | ||
865 | + | my %stat_cache = (); | |
866 | + | ||
867 | + | sub _stat | |
868 | + | { | |
869 | + | my ($filename) = shift; | |
870 | + | return @{ $stat_cache{$filename} } if exists $stat_cache{$filename}; | |
871 | + | my @res = stat($filename); | |
872 | + | $stat_cache{$filename} = \@res; | |
873 | + | return @res; | |
874 | + | } | |
875 | + | ||
876 | + | sub clear_stat_cache | |
877 | + | { | |
878 | + | %stat_cache = (); | |
879 | + | } | |
880 | + | ||
881 | + | sub need_update | |
882 | + | { | |
883 | + | my $filename = shift; | |
884 | + | my $size_on_server = shift; | |
885 | + | ||
886 | + | my ( undef, undef, undef, undef, undef, undef, undef, $size ) = _stat($filename); | |
887 | + | ||
888 | + | return 1 unless ($size); | |
889 | + | return 0 if $size_on_server == $size; | |
890 | + | ||
891 | + | if ( get_variable("unlink") == 1 ) | |
892 | + | { | |
893 | + | unlink $filename; | |
894 | + | } | |
895 | + | return 1; | |
896 | + | } | |
897 | + | ||
898 | + | sub process_index | |
899 | + | { | |
900 | + | my $uri = shift; | |
901 | + | my $index = shift; | |
902 | + | my $optional = shift; | |
903 | + | my ( $path, $package, $mirror, $files ) = ''; | |
904 | + | ||
905 | + | $path = sanitise_uri($uri); | |
906 | + | local $/ = "\n\n"; | |
907 | + | $mirror = get_variable("mirror_path") . "/" . $path; | |
908 | + | ||
909 | + | if (-e "$path/$index.gz" ) | |
910 | + | { | |
911 | + | system("gunzip < $path/$index.gz > $path/$index"); | |
912 | + | } | |
913 | + | elsif (-e "$path/$index.xz" ) | |
914 | + | { | |
915 | + | system("xz -d < $path/$index.xz > $path/$index"); | |
916 | + | } | |
917 | + | elsif (-e "$path/$index.lzma" ) | |
918 | + | { | |
919 | + | system("xz -d < $path/$index.xz > $path/$index"); | |
920 | + | } | |
921 | + | elsif (-e "$path/$index.bz2" ) | |
922 | + | { | |
923 | + | system("bzip2 -d < $path/$index.bz2 > $path/$index"); | |
924 | + | } | |
925 | + | ||
926 | + | unless ( open STREAM, "<$path/$index" ) | |
927 | + | { | |
928 | + | if ($optional) | |
929 | + | { | |
930 | + | return; | |
931 | + | } | |
932 | + | warn("apt-mirror: can't open index $path/$index in process_index"); | |
933 | + | return; | |
934 | + | } | |
935 | + | ||
936 | + | while ( $package = <STREAM> ) | |
937 | + | { | |
938 | + | local $/ = "\n"; | |
939 | + | chomp $package; | |
940 | + | my ( undef, %lines ) = split( /^([\w\-]+): */m, $package ); | |
941 | + | ||
942 | + | chomp(%lines); | |
943 | + | ||
944 | + | if ( exists $lines{"Filename"} ) | |
945 | + | { # Packages index | |
946 | + | my $filename = remove_double_slashes( $path . "/" . $lines{"Filename"}); | |
947 | + | $skipclean{ $filename } = 1; | |
948 | + | print { $files_fh{ALL} } $filename . "\n"; | |
949 | + | foreach my $hash (@hash_strength) | |
950 | + | { | |
951 | + | my $index_hash = $packages_hashes{$hash}; | |
952 | + | print { $files_fh{$hash} } $lines{$index_hash} . " " . $filename . "\n" if $lines{$index_hash}; | |
953 | + | } | |
954 | + | if ( need_update( $mirror . "/" . $lines{"Filename"}, $lines{"Size"} ) ) | |
955 | + | { | |
956 | + | my $hashsum = undef; | |
957 | + | my $hash = undef; | |
958 | + | foreach $hash (@hash_strength) | |
959 | + | { | |
960 | + | my $index_hash = $packages_hashes{$hash}; | |
961 | + | if ($lines{$index_hash}) | |
962 | + | { | |
963 | + | $hashsum = ${lines{$index_hash}}; | |
964 | + | last; | |
965 | + | } | |
966 | + | } | |
967 | + | print { $files_fh{NEW} } $filename. "\n"; | |
968 | + | add_url_to_download( $uri . "/" . $lines{"Filename"}, $lines{"Size"}, $hash, $hash, $hashsum, 0 ); | |
969 | + | } | |
970 | + | } | |
971 | + | else | |
972 | + | { # Sources index | |
973 | + | $lines{"Directory"} = "" unless defined $lines{"Directory"}; | |
974 | + | foreach my $hash (@hash_strength) | |
975 | + | { | |
976 | + | my $index_hash = $sources_hashes{$hash}; | |
977 | + | if ($lines{$index_hash}) | |
978 | + | { | |
979 | + | foreach ( split( /\n/, $lines{$index_hash} ) ) | |
980 | + | { | |
981 | + | next if $_ eq ''; | |
982 | + | my @file = split; | |
983 | + | die("apt-mirror: invalid Sources format") if @file != 3; | |
984 | + | my $download_url = $uri . "/" . $lines{"Directory"} . "/" . $file[2]; | |
985 | + | my $filename = remove_double_slashes( $path . "/" . $lines{"Directory"} . "/" . $file[2] ); | |
986 | + | print { $files_fh{$hash} } $file[0] . " " . ${filename} . "\n"; | |
987 | + | ||
988 | + | unless ($skipclean{ $filename }) | |
989 | + | { | |
990 | + | $skipclean{ $filename } = 1; | |
991 | + | print { $files_fh{ALL} } ${filename} . "\n"; | |
992 | + | if ( need_update( $mirror . "/" . $lines{"Directory"} . "/" . $file[2], $file[1] ) ) | |
993 | + | { | |
994 | + | print { $files_fh{NEW} } ${download_url} . "\n"; | |
995 | + | add_url_to_download( $uri . "/" . $lines{"Directory"} . "/" . $file[2], $file[1], $hash, $hash, $file[0], 0 ); | |
996 | + | } | |
997 | + | } | |
998 | + | } | |
999 | + | } | |
1000 | + | } | |
1001 | + | } | |
1002 | + | } | |
1003 | + | ||
1004 | + | close STREAM; | |
1005 | + | } | |
1006 | + | ||
1007 | + | print "Processing indexes: ["; | |
1008 | + | ||
1009 | + | foreach (@config_sources) | |
1010 | + | { | |
1011 | + | my ( $uri, $distribution, @components ) = @{$_}; | |
1012 | + | print "S"; | |
1013 | + | if (@components) | |
1014 | + | { | |
1015 | + | my $component; | |
1016 | + | foreach $component (@components) | |
1017 | + | { | |
1018 | + | process_index( $uri, "/dists/$distribution/$component/source/Sources" ); | |
1019 | + | } | |
1020 | + | } | |
1021 | + | else | |
1022 | + | { | |
1023 | + | process_index( $uri, "/$distribution/Sources" ); | |
1024 | + | } | |
1025 | + | } | |
1026 | + | ||
1027 | + | foreach (@config_binaries) | |
1028 | + | { | |
1029 | + | my ( $arch, $uri, $distribution, @components ) = @{$_}; | |
1030 | + | print "P"; | |
1031 | + | if (@components) | |
1032 | + | { | |
1033 | + | my $component; | |
1034 | + | foreach $component (@components) | |
1035 | + | { | |
1036 | + | process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" ); | |
1037 | + | process_index( $uri, "/dists/$distribution/$component/binary-all/Packages", 1 ); | |
1038 | + | } | |
1039 | + | } | |
1040 | + | else | |
1041 | + | { | |
1042 | + | process_index( $uri, "/$distribution/Packages" ); | |
1043 | + | } | |
1044 | + | } | |
1045 | + | ||
1046 | + | clear_stat_cache(); | |
1047 | + | ||
1048 | + | print "]\n\n"; | |
1049 | + | ||
1050 | + | foreach my $fh (values %files_fh) | |
1051 | + | { | |
1052 | + | close $fh; | |
1053 | + | } | |
1054 | + | ||
1055 | + | ###################################################################################### | |
1056 | + | ## Main download | |
1057 | + | ||
1058 | + | chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror"); | |
1059 | + | ||
1060 | + | my $need_bytes = 0; | |
1061 | + | foreach ( values %urls_to_download ) | |
1062 | + | { | |
1063 | + | $need_bytes += $_; | |
1064 | + | } | |
1065 | + | ||
1066 | + | my $size_output = format_bytes($need_bytes); | |
1067 | + | ||
1068 | + | print "$size_output will be downloaded into archive.\n"; | |
1069 | + | ||
1070 | + | download_urls( "archive", sort keys %urls_to_download ); | |
1071 | + | ||
1072 | + | ###################################################################################### | |
1073 | + | ## Copy skel to main archive | |
1074 | + | ||
1075 | + | sub copy_file | |
1076 | + | { | |
1077 | + | my ( $from, $to ) = @_; | |
1078 | + | my $dir = dirname($to); | |
1079 | + | return unless -f $from; | |
1080 | + | make_path($dir) unless -d $dir; | |
1081 | + | if ( get_variable("unlink") == 1 ) | |
1082 | + | { | |
1083 | + | if ( compare( $from, $to ) != 0 ) { unlink($to); } | |
1084 | + | } | |
1085 | + | my @stat_from = stat($from); | |
1086 | + | if ( -f $to ) | |
1087 | + | { | |
1088 | + | my @stat_to = stat($to); | |
1089 | + | return if ("@stat_to" eq "@stat_from"); | |
1090 | + | } | |
1091 | + | ||
1092 | + | unless ( link( $from, $to ) or copy( $from, $to ) ) | |
1093 | + | { | |
1094 | + | warn("apt-mirror: can't copy $from to $to"); | |
1095 | + | return; | |
1096 | + | } | |
1097 | + | my ( $atime, $mtime ) = @stat_from[ 8, 9 ]; | |
1098 | + | utime( $atime, $mtime, $to ) or die("apt-mirror: can't utime $to"); | |
1099 | + | } | |
1100 | + | ||
1101 | + | foreach (@release_urls, @index_urls) | |
1102 | + | { | |
1103 | + | die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][]; | |
1104 | + | copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ); | |
1105 | + | ||
1106 | + | my $sanitized_uri = sanitise_uri($_); | |
1107 | + | ||
1108 | + | # If we downloaded any files from a checksum location, now is the time to | |
1109 | + | # populate the canonical filename. | |
1110 | + | if ($hashsum_to_files{$sanitized_uri}) | |
1111 | + | { | |
1112 | + | foreach my $filename (@{$hashsum_to_files{$sanitized_uri}}) | |
1113 | + | { | |
1114 | + | copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $filename ); | |
1115 | + | if ($file_to_hashsums{$filename}) | |
1116 | + | { | |
1117 | + | foreach my $hashsum_filename (@{$file_to_hashsums{$filename}}) | |
1118 | + | { | |
1119 | + | copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $hashsum_filename ); | |
1120 | + | } | |
1121 | + | } | |
1122 | + | } | |
1123 | + | } | |
1124 | + | } | |
1125 | + | ||
1126 | + | ###################################################################################### | |
1127 | + | ## Make cleaning script | |
1128 | + | ||
1129 | + | my ( @rm_dirs, @rm_files ) = (); | |
1130 | + | my $unnecessary_bytes = 0; | |
1131 | + | ||
1132 | + | sub process_symlink | |
1133 | + | { | |
1134 | + | return 1; # symlinks are always needed | |
1135 | + | } | |
1136 | + | ||
1137 | + | sub process_file | |
1138 | + | { | |
1139 | + | my $file = shift; | |
1140 | + | $file =~ s[~][%7E]g if get_variable("_tilde"); | |
1141 | + | $file =~ s[\+][%2B]g if get_variable("_plus"); | |
1142 | + | return 1 if $skipclean{$file}; | |
1143 | + | push @rm_files, sanitise_uri($file); | |
1144 | + | my ( undef, undef, undef, undef, undef, undef, undef, $size, undef, undef, undef, undef, $blocks ) = stat($file); | |
1145 | + | $unnecessary_bytes += $blocks * 512; | |
1146 | + | return 0; | |
1147 | + | } | |
1148 | + | ||
1149 | + | sub process_directory | |
1150 | + | { | |
1151 | + | my $dir = shift; | |
1152 | + | my $is_needed = 0; | |
1153 | + | return 1 if $skipclean{$dir}; | |
1154 | + | opendir( my $dir_h, $dir ) or die "apt-mirror: can't opendir $dir: $!"; | |
1155 | + | foreach ( grep { !/^\.$/ && !/^\.\.$/ } readdir($dir_h) ) | |
1156 | + | { | |
1157 | + | my $item = $dir . "/" . $_; | |
1158 | + | $is_needed |= process_directory($item) if -d $item && !-l $item; | |
1159 | + | $is_needed |= process_file($item) if -f $item; | |
1160 | + | $is_needed |= process_symlink($item) if -l $item; | |
1161 | + | } | |
1162 | + | closedir $dir_h; | |
1163 | + | push @rm_dirs, $dir unless $is_needed; | |
1164 | + | return $is_needed; | |
1165 | + | } | |
1166 | + | ||
1167 | + | chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror"); | |
1168 | + | ||
1169 | + | foreach ( keys %clean_directory ) | |
1170 | + | { | |
1171 | + | process_directory($_) if -d $_ && !-l $_; | |
1172 | + | } | |
1173 | + | ||
1174 | + | open CLEAN, ">" . get_variable("cleanscript") or die("apt-mirror: can't open clean script file"); | |
1175 | + | ||
1176 | + | my ( $i, $total ) = ( 0, scalar @rm_files ); | |
1177 | + | ||
1178 | + | if ( get_variable("_autoclean") ) | |
1179 | + | { | |
1180 | + | ||
1181 | + | my $size_output = format_bytes($unnecessary_bytes); | |
1182 | + | print "$size_output in $total files and " . scalar(@rm_dirs) . " directories will be freed..."; | |
1183 | + | ||
1184 | + | chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror"); | |
1185 | + | ||
1186 | + | foreach (@rm_files) { unlink $_; } | |
1187 | + | foreach (@rm_dirs) { rmdir $_; } | |
1188 | + | ||
1189 | + | } | |
1190 | + | else | |
1191 | + | { | |
1192 | + | ||
1193 | + | my $size_output = format_bytes($unnecessary_bytes); | |
1194 | + | print "$size_output in $total files and " . scalar(@rm_dirs) . " directories can be freed.\n"; | |
1195 | + | print "Run " . get_variable("cleanscript") . " for this purpose.\n\n"; | |
1196 | + | ||
1197 | + | print CLEAN "#!/bin/sh\n"; | |
1198 | + | print CLEAN "set -e\n\n"; | |
1199 | + | print CLEAN "cd " . quoted_path(get_variable("mirror_path")) . "\n\n"; | |
1200 | + | print CLEAN "echo 'Removing $total unnecessary files [$size_output]...'\n"; | |
1201 | + | foreach (@rm_files) | |
1202 | + | { | |
1203 | + | print CLEAN "rm -f '$_'\n"; | |
1204 | + | print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 500; | |
1205 | + | print CLEAN "echo -n .\n" unless $i % 10; | |
1206 | + | $i++; | |
1207 | + | } | |
1208 | + | print CLEAN "echo 'done.'\n"; | |
1209 | + | print CLEAN "echo\n\n"; | |
1210 | + | ||
1211 | + | $i = 0; | |
1212 | + | $total = scalar @rm_dirs; | |
1213 | + | print CLEAN "echo 'Removing $total unnecessary directories...'\n"; | |
1214 | + | foreach (@rm_dirs) | |
1215 | + | { | |
1216 | + | print CLEAN "if test -d '$_'; then rm -fr '$_'; fi\n"; | |
1217 | + | print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 50; | |
1218 | + | print CLEAN "echo -n .\n"; | |
1219 | + | $i++; | |
1220 | + | } | |
1221 | + | print CLEAN "echo 'done.'\n"; | |
1222 | + | print CLEAN "echo\n"; | |
1223 | + | ||
1224 | + | close CLEAN; | |
1225 | + | ||
1226 | + | } | |
1227 | + | ||
1228 | + | # Make clean script executable | |
1229 | + | my $perm = ( stat get_variable("cleanscript") )[2] & 07777; | |
1230 | + | chmod( $perm | 0111, get_variable("cleanscript") ); | |
1231 | + | ||
1232 | + | if ( get_variable("run_postmirror") ) | |
1233 | + | { | |
1234 | + | print "Running the Post Mirror script ...\n"; | |
1235 | + | print "(" . get_variable("postmirror_script") . ")\n\n"; | |
1236 | + | if ( -x get_variable("postmirror_script") ) | |
1237 | + | { | |
1238 | + | system( get_variable("postmirror_script"), '' ); | |
1239 | + | } | |
1240 | + | else | |
1241 | + | { | |
1242 | + | system( '/bin/sh', get_variable("postmirror_script") ); | |
1243 | + | } | |
1244 | + | print "\nPost Mirror script has completed. See above output for any possible errors.\n\n"; | |
1245 | + | } | |
1246 | + | ||
1247 | + | unlock_aptmirror(); |
Newer
Older