Last active 1727105354

anduin's Avatar anduin revised this gist 1727105354. Go to revision

1 file changed, 1247 insertions

apt-mirror(file created)

@@ -0,0 +1,1247 @@
1 + #!/usr/bin/perl
2 +
3 + =pod
4 +
5 + =head1 NAME
6 +
7 + apt-mirror - apt sources mirroring tool
8 +
9 + =head1 SYNOPSIS
10 +
11 + apt-mirror [configfile]
12 +
13 + =head1 DESCRIPTION
14 +
15 + A small and efficient tool that lets you mirror a part of or
16 + the whole Debian GNU/Linux distribution or any other apt sources.
17 +
18 + Main features:
19 + * It uses a config similar to APT's F<sources.list>
20 + * It's fully pool compliant
21 + * It supports multithreaded downloading
22 + * It supports multiple architectures at the same time
23 + * It can automatically remove unneeded files
24 + * It works well on an overloaded Internet connection
25 + * It never produces an inconsistent mirror including while mirroring
26 + * It works on all POSIX compliant systems with Perl and wget
27 +
28 + =head1 COMMENTS
29 +
30 + apt-mirror uses F</etc/apt/mirror.list> as a configuration file.
31 + By default it is tuned to official Debian or Ubuntu mirrors. Change
32 + it for your needs.
33 +
34 + After you setup the configuration file you may run as root:
35 +
36 + # su - apt-mirror -c apt-mirror
37 +
38 + Or uncomment the line in F</etc/cron.d/apt-mirror> to enable daily mirror updates.
39 +
40 + =head1 FILES
41 +
42 + F</etc/apt/mirror.list>
43 + Main configuration file
44 +
45 + F</etc/cron.d/apt-mirror>
46 + Cron configuration template
47 +
48 + F</var/spool/apt-mirror/mirror>
49 + Mirror places here
50 +
51 + F</var/spool/apt-mirror/skel>
52 + Place for temporarily downloaded indexes
53 +
54 + F</var/spool/apt-mirror/var>
55 + Log files placed here. URLs and MD5 checksums also here.
56 +
57 + =head1 CONFIGURATION EXAMPLES
58 +
59 + The mirror.list configuration supports many options, the file is well commented explaining each option.
60 + Here are some sample mirror configuration lines showing the various supported ways:
61 +
62 + Normal:
63 + deb http://example.com/debian stable main contrib non-free
64 +
65 + Arch Specific: (many other architectures are supported)
66 + deb-powerpc http://example.com/debian stable main contrib non-free
67 +
68 + HTTP and FTP Auth or non-standard port:
69 + deb http://user:pass@example.com:8080/debian stable main contrib non-free
70 +
71 + HTTPS with sending Basic HTTP authentication information (plaintext username and password) for all requests:
72 + (this was default behaviour of Wget 1.10.2 and prior and is needed for some servers with new version of Wget)
73 + set auth_no_challenge 1
74 + deb https://user:pass@example.com:443/debian stable main contrib non-free
75 +
76 + HTTPS without checking certificate:
77 + set no_check_certificate 1
78 + deb https://example.com:443/debian stable main contrib non-free
79 +
80 + Source Mirroring:
81 + deb-src http://example.com/debian stable main contrib non-free
82 +
83 + =head1 AUTHORS
84 +
85 + Dmitry N. Hramtsov E<lt>hdn@nsu.ruE<gt>
86 + Brandon Holtsclaw E<lt>me@brandonholtsclaw.comE<gt>
87 +
88 + =cut
89 +
90 + use warnings;
91 + use strict;
92 + use File::Copy;
93 + use File::Compare;
94 + use File::Path qw(make_path);
95 + use File::Basename;
96 + use Fcntl qw(:flock);
97 +
98 + my $config_file;
99 +
100 + my %config_variables = (
101 + "defaultarch" => `dpkg --print-architecture 2>/dev/null` || 'i386',
102 + "nthreads" => 20,
103 + "base_path" => '/var/spool/apt-mirror',
104 + "mirror_path" => '$base_path/mirror',
105 + "skel_path" => '$base_path/skel',
106 + "var_path" => '$base_path/var',
107 + "cleanscript" => '$var_path/clean.sh',
108 + "_contents" => 1,
109 + "_autoclean" => 0,
110 + "_tilde" => 0,
111 + "_plus" => 0,
112 + "limit_rate" => '100m',
113 + "run_postmirror" => 1,
114 + "auth_no_challenge" => 0,
115 + "no_check_certificate" => 0,
116 + "unlink" => 0,
117 + "paranoid" => 0,
118 + "postmirror_script" => '$var_path/postmirror.sh',
119 + "use_proxy" => 'off',
120 + "http_proxy" => '',
121 + "https_proxy" => '',
122 + "proxy_user" => '',
123 + "proxy_password" => ''
124 + );
125 +
126 + my @config_binaries = ();
127 + my @config_sources = ();
128 +
129 + my @release_urls;
130 + my @index_urls;
131 + my @childrens = ();
132 + my %skipclean = ();
133 + my %clean_directory = ();
134 + my @hash_strength = qw(SHA512 SHA256 SHA1 MD5Sum);
135 + my %packages_hashes = (
136 + SHA512 => "SHA512",
137 + SHA256 => "SHA256",
138 + SHA1 => "SHA1",
139 + MD5Sum => "MD5sum",
140 + );
141 + my %sources_hashes = (
142 + SHA512 => "Checksums-Sha512",
143 + SHA256 => "Checksums-Sha256",
144 + SHA1 => "Checksums-Sha1",
145 + MD5Sum => "Files",
146 + );
147 + my %verify_commands = (
148 + SHA512 => "sha512sum",
149 + SHA256 => "sha256sum",
150 + SHA1 => "sha1sum",
151 + MD5Sum => "md5sum",
152 + );
153 + my %checksum_filenames = (
154 + SHA512 => "SHA512",
155 + SHA256 => "SHA256",
156 + SHA1 => "SHA1",
157 + MD5Sum => "MD5",
158 + );
159 +
160 + # Mapping of files downloaded from a by-hash directory to their canonical locations.
161 + my %hashsum_to_files = ();
162 +
163 + # Mapping of all the checksums for a given canonical filename.
164 + my %file_to_hashsums;
165 + my %urls_checksums = ();
166 +
167 + ######################################################################################
168 + ## Setting up $config_file variable
169 +
170 + $config_file = "/etc/apt/mirror.list"; # Default value
171 + if ( $_ = shift )
172 + {
173 + die("apt-mirror: invalid config file specified") unless -e $_;
174 + $config_file = $_;
175 + }
176 +
177 + chomp $config_variables{"defaultarch"};
178 +
179 + ######################################################################################
180 + ## Common subroutines
181 +
182 + sub round_number
183 + {
184 + my $n = shift;
185 + my $minus = $n < 0 ? '-' : '';
186 + $n = abs($n);
187 + $n = int( ( $n + .05 ) * 10 ) / 10;
188 + $n .= '.0' unless $n =~ /\./;
189 + $n .= '0' if substr( $n, ( length($n) - 1 ), 1 ) eq '.';
190 + chop $n if $n =~ /\.\d\d0$/;
191 + return "$minus$n";
192 + }
193 +
194 + sub format_bytes
195 + {
196 + my $bytes = shift;
197 + my $bytes_out = '0';
198 + my $size_name = 'bytes';
199 + my $KiB = 1024;
200 + my $MiB = 1024 * 1024;
201 + my $GiB = 1024 * 1024 * 1024;
202 +
203 + if ( $bytes >= $KiB )
204 + {
205 + $bytes_out = $bytes / $KiB;
206 + $size_name = 'KiB';
207 + if ( $bytes >= $MiB )
208 + {
209 + $bytes_out = $bytes / $MiB;
210 + $size_name = 'MiB';
211 + if ( $bytes >= $GiB )
212 + {
213 + $bytes_out = $bytes / $GiB;
214 + $size_name = 'GiB';
215 + }
216 + }
217 + $bytes_out = round_number($bytes_out);
218 + }
219 + else
220 + {
221 + $bytes_out = $bytes;
222 + $size_name = 'bytes';
223 + }
224 +
225 + return "$bytes_out $size_name";
226 + }
227 +
228 + sub get_variable
229 + {
230 + my $value = $config_variables{ shift @_ };
231 + my $count = 16;
232 + while ( $value =~ s/\$(\w+)/$config_variables{$1}/xg )
233 + {
234 + die("apt-mirror: too many substitution while evaluating variable") if ( $count-- ) < 0;
235 + }
236 + return $value;
237 + }
238 +
239 + sub quoted_path
240 + {
241 + my $path = shift;
242 + $path =~ s/'/'\\''/g;
243 + return "'" . $path . "'";
244 + }
245 +
246 + sub lock_aptmirror
247 + {
248 + open( LOCK_FILE, '>', get_variable("var_path") . "/apt-mirror.lock" );
249 + my $lock = flock( LOCK_FILE, LOCK_EX | LOCK_NB );
250 + if ( !$lock )
251 + {
252 + die("apt-mirror is already running, exiting");
253 + }
254 + }
255 +
256 + sub unlock_aptmirror
257 + {
258 + close(LOCK_FILE);
259 + unlink( get_variable("var_path") . "/apt-mirror.lock" );
260 + }
261 +
262 + sub delete_corrupted_files
263 + {
264 + my $stage = shift;
265 + my $found = 0;
266 + foreach my $hash (@hash_strength)
267 + {
268 + my $file = get_variable("var_path") . "/${stage}-${hash}";
269 + if (-s $file)
270 + {
271 + my $pipe;
272 + open $pipe, "-|", qq(env LC_ALL=C ${verify_commands{$hash}} --check --quiet ${file} 2>/dev/null) or die "Cannot run ${verify_commands{$hash}}";
273 + while (<$pipe>)
274 + {
275 + my ($filename) = /^(.*): FAILED/;
276 + if (-f $filename)
277 + {
278 + $found++;
279 + print "$filename is corrupted, deleting....\n";
280 + unlink $filename or die "Cannot delete $filename.";
281 + }
282 + }
283 + close $pipe;
284 + }
285 + }
286 + return $found;
287 + }
288 +
289 + sub download_urls
290 + {
291 + my $stage = shift;
292 + my @urls;
293 + my $i = 0;
294 + my $pid;
295 + my $nthreads = get_variable("nthreads");
296 + my @args = ();
297 + local $| = 1;
298 +
299 + @urls = @_;
300 + $nthreads = @urls if @urls < $nthreads;
301 +
302 + if ( get_variable("auth_no_challenge") == 1 ) { push( @args, "--auth-no-challenge" ); }
303 + if ( get_variable("no_check_certificate") == 1 ) { push( @args, "--no-check-certificate" ); }
304 + if ( get_variable("unlink") == 1 ) { push( @args, "--unlink" ); }
305 + if ( length( get_variable("use_proxy") ) && ( get_variable("use_proxy") eq 'yes' || get_variable("use_proxy") eq 'on' ) )
306 + {
307 + if ( length( get_variable("http_proxy") ) || length( get_variable("https_proxy") ) ) { push( @args, "-e use_proxy=yes" ); }
308 + if ( length( get_variable("http_proxy") ) ) { push( @args, "-e http_proxy=" . get_variable("http_proxy") ); }
309 + if ( length( get_variable("https_proxy") ) ) { push( @args, "-e https_proxy=" . get_variable("https_proxy") ); }
310 + if ( length( get_variable("proxy_user") ) ) { push( @args, "-e proxy_user=" . get_variable("proxy_user") ); }
311 + if ( length( get_variable("proxy_password") ) ) { push( @args, "-e proxy_password=" . get_variable("proxy_password") ); }
312 + }
313 + print "Downloading " . scalar(@urls) . " $stage files using $nthreads threads...\n";
314 +
315 + if (get_variable("paranoid"))
316 + {
317 + my %fh = ();
318 + foreach my $hash (@hash_strength)
319 + {
320 + open $fh{$hash}, ">", get_variable("var_path") . "/${stage}-${hash}" or die ("apt-mirror: Cannot write to ${stage}-${hash}");
321 + }
322 +
323 + foreach (@urls)
324 + {
325 + if ($urls_checksums{$_})
326 + {
327 + my ($hash, $hashsum) = @{$urls_checksums{$_}};
328 + my $fh = $fh{$hash};
329 + print $fh $hashsum . " " . sanitise_uri($_) . "\n";
330 + }
331 + }
332 + foreach my $hash (@hash_strength)
333 + {
334 + close $fh{$hash};
335 + }
336 + }
337 +
338 + my @url_fds;
339 + for ($i=0; $i<$nthreads; $i++)
340 + {
341 + open ( $url_fds[$i], ">", get_variable("var_path") . "/$stage-urls.$i") or die("apt-mirror: can't write to intermediate file ($stage-urls.$i)");
342 + }
343 +
344 + for ($i=scalar(@urls)-1; $i>=0; $i--)
345 + {
346 + my $thread = $i % $nthreads;
347 + print { $url_fds[$thread] } $urls[$i] . "\n";
348 + }
349 +
350 + foreach (@url_fds) {
351 + close $_ or die("apt-mirror: can't close intermediate file ($stage-urls.$i)");
352 + }
353 +
354 + for ($i=0; $i<$nthreads; $i++)
355 + {
356 +
357 + $pid = fork();
358 +
359 + die("apt-mirror: can't do fork in download_urls") if !defined($pid);
360 +
361 + if ( $pid == 0 )
362 + {
363 + exec 'wget', '--no-if-modified-since', '--no-cache', '--limit-rate=' . get_variable("limit_rate"), '-T', '60', '-t', '1', '-r', '-N', '-l', 'inf', '-o', get_variable("var_path") . "/$stage-log.$i", '-i', get_variable("var_path") . "/$stage-urls.$i", @args;
364 +
365 + # shouldn't reach this unless exec fails
366 + die("\n\nCould not run wget, please make sure its installed and in your path\n\n");
367 + }
368 +
369 + push @childrens, $pid;
370 + }
371 +
372 + print "Begin time: " . localtime() . "\n[" . scalar(@childrens) . "]... ";
373 + while ( scalar @childrens )
374 + {
375 + my $dead = wait();
376 + @childrens = grep { $_ != $dead } @childrens;
377 + print "[" . scalar(@childrens) . "]... ";
378 + }
379 + print "\nEnd time: " . localtime() . "\n\n";
380 +
381 + if (get_variable("paranoid"))
382 + {
383 + if (delete_corrupted_files($stage) > 0)
384 + {
385 + die "Some files were corrupted while downloading, aborting...";
386 + }
387 + }
388 +
389 + if (scalar keys %hashsum_to_files > 0)
390 + {
391 + foreach my $hashsum_filename (keys %hashsum_to_files)
392 + {
393 + foreach my $filename (@{$hashsum_to_files{$hashsum_filename}})
394 + {
395 + copy_file( $hashsum_filename, $filename );
396 + }
397 + }
398 + }
399 +
400 + }
401 +
402 + ## Parse config
403 +
404 + sub parse_config_line
405 + {
406 + my $pattern_deb_line = qr/^[\t ]*(?<type>deb-src|deb)(?:-(?<arch>[\w\-]+))?[\t ]+(?:\[(?<options>[^\]]+)\][\t ]+)?(?<uri>[^\s]+)[\t ]+(?<components>.+)$/;
407 + my $line = $_;
408 + my %config;
409 + if ( $line =~ $pattern_deb_line ) {
410 + $config{'type'} = $+{type};
411 + $config{'arch'} = $+{arch};
412 + $config{'options'} = $+{options} ? $+{options} : "";
413 + $config{'uri'} = $+{uri};
414 + $config{'components'} = $+{components};
415 + if ( $config{'options'} =~ /arch=((?<arch>[\w\-]+)[,]*)/g ) {
416 + $config{'arch'} = $+{arch};
417 + }
418 + $config{'components'} = [ split /\s+/, $config{'components'} ];
419 + } elsif ( $line =~ /set[\t ]+(?<key>[^\s]+)[\t ]+(?<value>"[^"]+"|'[^']+'|[^\s]+)/ ) {
420 + $config{'type'} = 'set';
421 + $config{'key'} = $+{key};
422 + $config{'value'} = $+{value};
423 + $config{'value'} =~ s/^'(.*)'$/$1/;
424 + $config{'value'} =~ s/^"(.*)"$/$1/;
425 + } elsif ( $line =~ /(?<type>clean|skip-clean)[\t ]+(?<uri>[^\s]+)/ ) {
426 + $config{'type'} = $+{type};
427 + $config{'uri'} = $+{uri};
428 + }
429 +
430 + return %config;
431 + }
432 +
433 + sub sanitise_uri
434 + {
435 + my $uri = shift;
436 + $uri =~ s[^(\w+)://][];
437 + $uri =~ s/^([^@]+)?@?// if (split '/',$uri)[0] =~ /@/;
438 + $uri =~ s/~/\%7E/g if get_variable("_tilde");
439 + $uri =~ s/\+/\%2B/g if get_variable("_plus");
440 + $uri =~ s[/$][];
441 + return $uri;
442 + }
443 +
444 + open CONFIG, "<$config_file" or die("apt-mirror: can't open config file ($config_file)");
445 + while (<CONFIG>)
446 + {
447 + next if /^\s*#/;
448 + next unless /\S/;
449 + my $line = $_;
450 + my %config_line = parse_config_line;
451 +
452 + if ( $config_line{'type'} eq "set" ) {
453 + $config_variables{ $config_line{'key'} } = $config_line{'value'};
454 + next;
455 + } elsif ( $config_line{'type'} eq "deb" ) {
456 + my $arch = $config_line{'arch'};
457 + $arch = get_variable("defaultarch") if ! defined $config_line{'arch'};
458 + push @config_binaries, [ $arch, $config_line{'uri'}, @{$config_line{'components'}} ];
459 + next;
460 + } elsif ( $config_line{'type'} eq "deb-src" ) {
461 + push @config_sources, [ $config_line{'uri'}, @{$config_line{'components'}} ];
462 + next;
463 + } elsif ( $config_line{'type'} =~ /(skip-clean|clean)/ ) {
464 + my $link = sanitise_uri($config_line{'uri'});
465 + if ( $config_line{'type'} eq "skip-clean" ) {
466 + $skipclean{ $link } = 1;
467 + } elsif ( $config_line{'type'} eq "clean" ) {
468 + $clean_directory{ $link } = 1;
469 + }
470 + next;
471 + }
472 +
473 + die("apt-mirror: invalid line in config file ($.: $line ...)");
474 + }
475 + close CONFIG;
476 +
477 + die("Please explicitly specify 'defaultarch' in mirror.list") unless get_variable("defaultarch");
478 +
479 + ######################################################################################
480 + ## Create the 3 needed directories if they don't exist yet
481 + my @needed_directories = ( get_variable("mirror_path"), get_variable("skel_path"), get_variable("var_path") );
482 + foreach my $needed_directory (@needed_directories)
483 + {
484 + unless ( -d $needed_directory )
485 + {
486 + make_path($needed_directory) or die("apt-mirror: can't create $needed_directory directory");
487 + }
488 + }
489 + #
490 + #######################################################################################
491 +
492 + lock_aptmirror();
493 +
494 + ######################################################################################
495 + ## Skel download
496 +
497 + my %urls_to_download = ();
498 + my ( $url, $arch );
499 +
500 + sub remove_double_slashes
501 + {
502 + local $_ = shift;
503 + while (s[/\./][/]g) { }
504 + while (s[(?<!:)//][/]g) { }
505 + while (s[(?<!:/)/[^/]+/\.\./][/]g) { }
506 + s/~/\%7E/g if get_variable("_tilde");
507 + s/\+/\%2B/g if get_variable("_plus");
508 + return $_;
509 + }
510 +
511 + sub add_url_to_download
512 + {
513 + my $url = remove_double_slashes(shift);
514 + my $size = shift;
515 + my $strongest_hash = shift;
516 + my $hash = shift;
517 + my $hashsum = shift;
518 + my $acquire_by_hash = shift;
519 +
520 + my $canonical_filename = sanitise_uri($url);
521 + $skipclean{$canonical_filename} = 1;
522 +
523 + if ($acquire_by_hash)
524 + {
525 + # If the optional hashsum was passed as an argument
526 + # - download the strongest hash only
527 + # - make a copy to the canonical location
528 + # - make a copy for the other known hash versions
529 +
530 + $url = dirname($url) . "/by-hash/${hash}/${hashsum}";
531 +
532 + my $hashsum_filename = dirname($canonical_filename) . "/by-hash/${hash}/${hashsum}";
533 + $skipclean{$hashsum_filename} = 1;
534 +
535 + if ($hash eq $strongest_hash)
536 + {
537 + # This is the strongest hash, which is the one to download.
538 + # Also need to remember to which canonical location it should be linked.
539 + $hashsum_to_files{$hashsum_filename} ||= [];
540 + push @{$hashsum_to_files{$hashsum_filename}}, $canonical_filename;
541 + $urls_to_download{$url} = $size;
542 + $urls_checksums{$url} = [ $hash, $hashsum ];
543 +
544 + } else {
545 + # We are not going to download using this checksum, but we still
546 + # need to know where to put the checksum.
547 + $file_to_hashsums{$canonical_filename} ||= [];
548 + push @{$file_to_hashsums{$canonical_filename}}, $hashsum_filename;
549 + }
550 + } else {
551 + # Not using by-hash, so download the file only.
552 + $urls_to_download{$url} = $size;
553 + if ($strongest_hash and ($hash eq $strongest_hash))
554 + {
555 + $urls_checksums{$url} = [ $hash, $hashsum ];
556 + }
557 + }
558 + }
559 +
560 + foreach (@config_sources)
561 + {
562 + my ( $uri, $distribution, @components ) = @{$_};
563 +
564 + if (@components)
565 + {
566 + $url = $uri . "/dists/" . $distribution . "/";
567 + }
568 + else
569 + {
570 + $url = $uri . "/" . $distribution . "/";
571 + }
572 +
573 + add_url_to_download( $url . "InRelease" );
574 + add_url_to_download( $url . "Release" );
575 + add_url_to_download( $url . "Release.gpg" );
576 + }
577 +
578 + foreach (@config_binaries)
579 + {
580 + my ( $arch, $uri, $distribution, @components ) = @{$_};
581 +
582 + if (@components)
583 + {
584 + $url = $uri . "/dists/" . $distribution . "/";
585 +
586 + }
587 + else
588 + {
589 + $url = $uri . "/" . $distribution . "/";
590 + }
591 +
592 + add_url_to_download( $url . "InRelease" );
593 + add_url_to_download( $url . "Release" );
594 + add_url_to_download( $url . "Release.gpg" );
595 +
596 + }
597 +
598 + chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel");
599 + @release_urls = sort keys %urls_to_download;
600 + download_urls( "release", @release_urls );
601 +
602 + ######################################################################################
603 + ## Download all relevant metadata
604 +
605 + %urls_to_download = ();
606 +
607 + sub find_metadata_in_release
608 + {
609 + # Look in the Release file for any files we need to download
610 + my ( $arch, $uri, $distribution, @components ) = @_;
611 +
612 + my ( $release_uri, $release_path, $line ) = '';
613 + my $component_regex = undef;
614 + my $arch_regex = "(?:${arch}|all)";
615 + my $compressed_extension_regex = '(?:\.(?:gz|bz2|xz|lzma))$';
616 + my $dist_uri;
617 + my $hash_type_regex = "(?:" . join("|", @hash_strength) . ")";
618 +
619 + if (@components)
620 + {
621 + $dist_uri = remove_double_slashes($uri . "/dists/" . $distribution . "/");
622 + $component_regex = "(?:" . join("|", @components) . ")";
623 + }
624 + else {
625 + $dist_uri = remove_double_slashes($uri . "/" . $distribution . "/");
626 + }
627 +
628 + my $stream;
629 + foreach my $release_filename ("InRelease", "Release")
630 + {
631 + $release_uri = $dist_uri . $release_filename;
632 + $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);
633 +
634 + last if ( open $stream, "<", $release_path);
635 + $stream = undef;
636 + }
637 +
638 + unless ( $stream )
639 + {
640 + warn( "Failed to find InRelease or Release in " . get_variable("skel_path") . "/" . sanitise_uri($dist_uri) );
641 + return 0;
642 + }
643 +
644 +
645 + my $hash = undef;
646 + my %avaiable_hashes = ();
647 + my $acquire_by_hash = 0;
648 + my @parts_to_download = ();
649 + while ( $line = <$stream> )
650 + {
651 + chomp $line;
652 + if ($hash)
653 + {
654 + if ( $line =~ /^ +(.*)$/ )
655 + {
656 + my @parts = split( / +/, $1 );
657 + if ( @parts == 3 )
658 + {
659 + my ( $hashsum, $size, $filename ) = @parts;
660 + push @parts, $hash;
661 + if ($arch eq "source")
662 + {
663 + if ($component_regex)
664 + {
665 + # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
666 + if (
667 + (
668 + $filename =~ m{^${component_regex}/source/Sources${compressed_extension_regex}}
669 + ) or (
670 + $filename =~ m{^${component_regex}/Contents-source${compressed_extension_regex}}
671 + )
672 + )
673 + {
674 + push @parts_to_download, \@parts;
675 + }
676 + } else {
677 + # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
678 + if ($filename =~ m{^Sources${compressed_extension_regex}}
679 + ) {
680 + push @parts_to_download, \@parts;
681 + }
682 + }
683 + } else {
684 + if ($component_regex)
685 + {
686 + # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
687 + if (
688 + (
689 + $filename =~ m{^${component_regex}/Contents-${arch_regex}${compressed_extension_regex}}
690 + ) or (
691 + $filename =~ m{^Contents-${arch_regex}${compressed_extension_regex}}
692 + ) or (
693 + $filename =~ m{^Packages${compressed_extension_regex}}
694 + ) or (
695 + $filename =~ m{^${component_regex}/binary-${arch_regex}/Packages${compressed_extension_regex}}
696 + ) or (
697 + $filename =~ m{^${component_regex}/binary-${arch_regex}/Release$}
698 + ) or (
699 + $filename =~ m{^${component_regex}/cnf/Commands-${arch_regex}${compressed_extension_regex}}
700 + ) or (
701 + $filename =~ m{^${component_regex}/dep11/Components-${arch_regex}.*${compressed_extension_regex}}
702 + ) or (
703 + $filename =~ m{^${component_regex}/dep11/icons-.*${compressed_extension_regex}}
704 + ) or (
705 + $filename =~ m{^${component_regex}/i18n/Translation-.*${compressed_extension_regex}}
706 + )
707 + )
708 + {
709 + push @parts_to_download, \@parts;
710 + }
711 + } else {
712 + # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
713 + if ($filename =~ m{^Packages${compressed_extension_regex}})
714 + {
715 + push @parts_to_download, \@parts;
716 + }
717 + }
718 + }
719 + }
720 + else
721 + {
722 + warn("Malformed checksum line \"$1\" in $release_uri");
723 + }
724 + }
725 + else
726 + {
727 + $hash = undef;
728 + }
729 + }
730 + if ( not $hash )
731 + {
732 + if ( $line =~ /^(${hash_type_regex}):$/ )
733 + {
734 + $hash = $1;
735 + $avaiable_hashes{$hash} = 1;
736 + }
737 + elsif ( $line eq "Acquire-By-Hash: yes" )
738 + {
739 + $acquire_by_hash = 1;
740 + }
741 + }
742 + }
743 + close $stream;
744 +
745 + my $strongest_hash;
746 + if ($acquire_by_hash)
747 + {
748 + foreach (@hash_strength)
749 + {
750 + if ($avaiable_hashes{$_})
751 + {
752 + $strongest_hash = $_;
753 + last;
754 + }
755 + }
756 + unless ($strongest_hash)
757 + {
758 + warn("Cannot find a supported hash in $release_uri, will download from canonical locations.");
759 + $acquire_by_hash = 0;
760 + }
761 + }
762 +
763 + foreach (@parts_to_download)
764 + {
765 + my ( $hashsum, $size, $filename, $hash ) = @{$_};
766 + if ($acquire_by_hash)
767 + {
768 + add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 1 );
769 + }
770 + else
771 + {
772 + add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 0 );
773 + }
774 + }
775 + return 1;
776 + }
777 +
778 + print "Processing metadata files from releases [";
779 + foreach (@config_binaries)
780 + {
781 + my ( $arch, $uri, $distribution, @components ) = @{$_};
782 + print "M";
783 + unless (find_metadata_in_release( $arch, $uri, $distribution, @components))
784 + {
785 + # Insecure repo with no release file - try to get the well known indices
786 + foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "")
787 + {
788 + if (@components)
789 + {
790 + # Debian repo
791 + foreach my $component (@components)
792 + {
793 + foreach my $path (
794 + "/dists/${distribution}/${component}/binary-${arch}/Packages",
795 + "/dists/${distribution}/${component}/binary-all/Packages",
796 + "/dists/${distribution}/${component}/Contents-${arch}",
797 + "/dists/${distribution}/${component}/Contents-all",
798 + "/dists/${distribution}/Contents-${arch}",
799 + "/dists/${distribution}/Contents-all",
800 + )
801 + {
802 + add_url_to_download( "${uri}/${path}${file_extension}" );
803 + }
804 + }
805 + } else {
806 + # Flat repo
807 + foreach my $path (
808 + "${distribution}/Packages",
809 + "${distribution}/Contents-${arch}",
810 + "${distribution}/Contents-all",
811 + )
812 + {
813 + add_url_to_download( "${uri}/${path}${file_extension}" );
814 + }
815 + }
816 + }
817 + }
818 + }
819 +
820 + foreach (@config_sources)
821 + {
822 + my ( $uri, $distribution, @components ) = @{$_};
823 + print "M";
824 + unless (find_metadata_in_release( "source", $uri, $distribution, @components))
825 + {
826 + # Insecure repo with no release file - try to get the well known indices
827 + foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "")
828 + {
829 + if (@components)
830 + {
831 + # Debian repo
832 + foreach my $path (
833 + "${distribution}/source/Sources",
834 + "${distribution}/Contents-source",
835 + )
836 + {
837 + add_url_to_download( "${uri}/${path}${file_extension}" );
838 + }
839 + } else {
840 + # Flat repo
841 + add_url_to_download( "${uri}/${distribution}/Sources${file_extension}" );
842 + }
843 + }
844 + }
845 + }
846 + print "]\n\n";
847 +
848 + @index_urls = sort keys %urls_to_download;
849 + download_urls( "index", @index_urls );
850 +
851 + ######################################################################################
852 + ## Main download preparations
853 +
854 + %urls_to_download = ();
855 +
856 + my %files_fh;
857 +
858 + open $files_fh{ALL}, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)");
859 + open $files_fh{NEW}, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)");
860 + foreach my $hash (@hash_strength)
861 + {
862 + open $files_fh{$hash}, ">" . get_variable("var_path") . "/" . ${checksum_filenames{$hash}} or die("apt-mirror: can't write to intermediate file (${hash})");
863 + }
864 +
865 + my %stat_cache = ();
866 +
867 + sub _stat
868 + {
869 + my ($filename) = shift;
870 + return @{ $stat_cache{$filename} } if exists $stat_cache{$filename};
871 + my @res = stat($filename);
872 + $stat_cache{$filename} = \@res;
873 + return @res;
874 + }
875 +
876 + sub clear_stat_cache
877 + {
878 + %stat_cache = ();
879 + }
880 +
881 + sub need_update
882 + {
883 + my $filename = shift;
884 + my $size_on_server = shift;
885 +
886 + my ( undef, undef, undef, undef, undef, undef, undef, $size ) = _stat($filename);
887 +
888 + return 1 unless ($size);
889 + return 0 if $size_on_server == $size;
890 +
891 + if ( get_variable("unlink") == 1 )
892 + {
893 + unlink $filename;
894 + }
895 + return 1;
896 + }
897 +
898 + sub process_index
899 + {
900 + my $uri = shift;
901 + my $index = shift;
902 + my $optional = shift;
903 + my ( $path, $package, $mirror, $files ) = '';
904 +
905 + $path = sanitise_uri($uri);
906 + local $/ = "\n\n";
907 + $mirror = get_variable("mirror_path") . "/" . $path;
908 +
909 + if (-e "$path/$index.gz" )
910 + {
911 + system("gunzip < $path/$index.gz > $path/$index");
912 + }
913 + elsif (-e "$path/$index.xz" )
914 + {
915 + system("xz -d < $path/$index.xz > $path/$index");
916 + }
917 + elsif (-e "$path/$index.lzma" )
918 + {
919 + system("xz -d < $path/$index.xz > $path/$index");
920 + }
921 + elsif (-e "$path/$index.bz2" )
922 + {
923 + system("bzip2 -d < $path/$index.bz2 > $path/$index");
924 + }
925 +
926 + unless ( open STREAM, "<$path/$index" )
927 + {
928 + if ($optional)
929 + {
930 + return;
931 + }
932 + warn("apt-mirror: can't open index $path/$index in process_index");
933 + return;
934 + }
935 +
936 + while ( $package = <STREAM> )
937 + {
938 + local $/ = "\n";
939 + chomp $package;
940 + my ( undef, %lines ) = split( /^([\w\-]+): */m, $package );
941 +
942 + chomp(%lines);
943 +
944 + if ( exists $lines{"Filename"} )
945 + { # Packages index
946 + my $filename = remove_double_slashes( $path . "/" . $lines{"Filename"});
947 + $skipclean{ $filename } = 1;
948 + print { $files_fh{ALL} } $filename . "\n";
949 + foreach my $hash (@hash_strength)
950 + {
951 + my $index_hash = $packages_hashes{$hash};
952 + print { $files_fh{$hash} } $lines{$index_hash} . " " . $filename . "\n" if $lines{$index_hash};
953 + }
954 + if ( need_update( $mirror . "/" . $lines{"Filename"}, $lines{"Size"} ) )
955 + {
956 + my $hashsum = undef;
957 + my $hash = undef;
958 + foreach $hash (@hash_strength)
959 + {
960 + my $index_hash = $packages_hashes{$hash};
961 + if ($lines{$index_hash})
962 + {
963 + $hashsum = ${lines{$index_hash}};
964 + last;
965 + }
966 + }
967 + print { $files_fh{NEW} } $filename. "\n";
968 + add_url_to_download( $uri . "/" . $lines{"Filename"}, $lines{"Size"}, $hash, $hash, $hashsum, 0 );
969 + }
970 + }
971 + else
972 + { # Sources index
973 + $lines{"Directory"} = "" unless defined $lines{"Directory"};
974 + foreach my $hash (@hash_strength)
975 + {
976 + my $index_hash = $sources_hashes{$hash};
977 + if ($lines{$index_hash})
978 + {
979 + foreach ( split( /\n/, $lines{$index_hash} ) )
980 + {
981 + next if $_ eq '';
982 + my @file = split;
983 + die("apt-mirror: invalid Sources format") if @file != 3;
984 + my $download_url = $uri . "/" . $lines{"Directory"} . "/" . $file[2];
985 + my $filename = remove_double_slashes( $path . "/" . $lines{"Directory"} . "/" . $file[2] );
986 + print { $files_fh{$hash} } $file[0] . " " . ${filename} . "\n";
987 +
988 + unless ($skipclean{ $filename })
989 + {
990 + $skipclean{ $filename } = 1;
991 + print { $files_fh{ALL} } ${filename} . "\n";
992 + if ( need_update( $mirror . "/" . $lines{"Directory"} . "/" . $file[2], $file[1] ) )
993 + {
994 + print { $files_fh{NEW} } ${download_url} . "\n";
995 + add_url_to_download( $uri . "/" . $lines{"Directory"} . "/" . $file[2], $file[1], $hash, $hash, $file[0], 0 );
996 + }
997 + }
998 + }
999 + }
1000 + }
1001 + }
1002 + }
1003 +
1004 + close STREAM;
1005 + }
1006 +
1007 + print "Processing indexes: [";
1008 +
1009 + foreach (@config_sources)
1010 + {
1011 + my ( $uri, $distribution, @components ) = @{$_};
1012 + print "S";
1013 + if (@components)
1014 + {
1015 + my $component;
1016 + foreach $component (@components)
1017 + {
1018 + process_index( $uri, "/dists/$distribution/$component/source/Sources" );
1019 + }
1020 + }
1021 + else
1022 + {
1023 + process_index( $uri, "/$distribution/Sources" );
1024 + }
1025 + }
1026 +
1027 + foreach (@config_binaries)
1028 + {
1029 + my ( $arch, $uri, $distribution, @components ) = @{$_};
1030 + print "P";
1031 + if (@components)
1032 + {
1033 + my $component;
1034 + foreach $component (@components)
1035 + {
1036 + process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" );
1037 + process_index( $uri, "/dists/$distribution/$component/binary-all/Packages", 1 );
1038 + }
1039 + }
1040 + else
1041 + {
1042 + process_index( $uri, "/$distribution/Packages" );
1043 + }
1044 + }
1045 +
1046 + clear_stat_cache();
1047 +
1048 + print "]\n\n";
1049 +
1050 + foreach my $fh (values %files_fh)
1051 + {
1052 + close $fh;
1053 + }
1054 +
1055 + ######################################################################################
1056 + ## Main download
1057 +
1058 + chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");
1059 +
1060 + my $need_bytes = 0;
1061 + foreach ( values %urls_to_download )
1062 + {
1063 + $need_bytes += $_;
1064 + }
1065 +
1066 + my $size_output = format_bytes($need_bytes);
1067 +
1068 + print "$size_output will be downloaded into archive.\n";
1069 +
1070 + download_urls( "archive", sort keys %urls_to_download );
1071 +
1072 + ######################################################################################
1073 + ## Copy skel to main archive
1074 +
1075 + sub copy_file
1076 + {
1077 + my ( $from, $to ) = @_;
1078 + my $dir = dirname($to);
1079 + return unless -f $from;
1080 + make_path($dir) unless -d $dir;
1081 + if ( get_variable("unlink") == 1 )
1082 + {
1083 + if ( compare( $from, $to ) != 0 ) { unlink($to); }
1084 + }
1085 + my @stat_from = stat($from);
1086 + if ( -f $to )
1087 + {
1088 + my @stat_to = stat($to);
1089 + return if ("@stat_to" eq "@stat_from");
1090 + }
1091 +
1092 + unless ( link( $from, $to ) or copy( $from, $to ) )
1093 + {
1094 + warn("apt-mirror: can't copy $from to $to");
1095 + return;
1096 + }
1097 + my ( $atime, $mtime ) = @stat_from[ 8, 9 ];
1098 + utime( $atime, $mtime, $to ) or die("apt-mirror: can't utime $to");
1099 + }
1100 +
1101 + foreach (@release_urls, @index_urls)
1102 + {
1103 + die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][];
1104 + copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") );
1105 +
1106 + my $sanitized_uri = sanitise_uri($_);
1107 +
1108 + # If we downloaded any files from a checksum location, now is the time to
1109 + # populate the canonical filename.
1110 + if ($hashsum_to_files{$sanitized_uri})
1111 + {
1112 + foreach my $filename (@{$hashsum_to_files{$sanitized_uri}})
1113 + {
1114 + copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $filename );
1115 + if ($file_to_hashsums{$filename})
1116 + {
1117 + foreach my $hashsum_filename (@{$file_to_hashsums{$filename}})
1118 + {
1119 + copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $hashsum_filename );
1120 + }
1121 + }
1122 + }
1123 + }
1124 + }
1125 +
1126 + ######################################################################################
1127 + ## Make cleaning script
1128 +
1129 + my ( @rm_dirs, @rm_files ) = ();
1130 + my $unnecessary_bytes = 0;
1131 +
1132 + sub process_symlink
1133 + {
1134 + return 1; # symlinks are always needed
1135 + }
1136 +
1137 + sub process_file
1138 + {
1139 + my $file = shift;
1140 + $file =~ s[~][%7E]g if get_variable("_tilde");
1141 + $file =~ s[\+][%2B]g if get_variable("_plus");
1142 + return 1 if $skipclean{$file};
1143 + push @rm_files, sanitise_uri($file);
1144 + my ( undef, undef, undef, undef, undef, undef, undef, $size, undef, undef, undef, undef, $blocks ) = stat($file);
1145 + $unnecessary_bytes += $blocks * 512;
1146 + return 0;
1147 + }
1148 +
1149 + sub process_directory
1150 + {
1151 + my $dir = shift;
1152 + my $is_needed = 0;
1153 + return 1 if $skipclean{$dir};
1154 + opendir( my $dir_h, $dir ) or die "apt-mirror: can't opendir $dir: $!";
1155 + foreach ( grep { !/^\.$/ && !/^\.\.$/ } readdir($dir_h) )
1156 + {
1157 + my $item = $dir . "/" . $_;
1158 + $is_needed |= process_directory($item) if -d $item && !-l $item;
1159 + $is_needed |= process_file($item) if -f $item;
1160 + $is_needed |= process_symlink($item) if -l $item;
1161 + }
1162 + closedir $dir_h;
1163 + push @rm_dirs, $dir unless $is_needed;
1164 + return $is_needed;
1165 + }
1166 +
1167 + chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");
1168 +
1169 + foreach ( keys %clean_directory )
1170 + {
1171 + process_directory($_) if -d $_ && !-l $_;
1172 + }
1173 +
1174 + open CLEAN, ">" . get_variable("cleanscript") or die("apt-mirror: can't open clean script file");
1175 +
1176 + my ( $i, $total ) = ( 0, scalar @rm_files );
1177 +
1178 + if ( get_variable("_autoclean") )
1179 + {
1180 +
1181 + my $size_output = format_bytes($unnecessary_bytes);
1182 + print "$size_output in $total files and " . scalar(@rm_dirs) . " directories will be freed...";
1183 +
1184 + chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");
1185 +
1186 + foreach (@rm_files) { unlink $_; }
1187 + foreach (@rm_dirs) { rmdir $_; }
1188 +
1189 + }
1190 + else
1191 + {
1192 +
1193 + my $size_output = format_bytes($unnecessary_bytes);
1194 + print "$size_output in $total files and " . scalar(@rm_dirs) . " directories can be freed.\n";
1195 + print "Run " . get_variable("cleanscript") . " for this purpose.\n\n";
1196 +
1197 + print CLEAN "#!/bin/sh\n";
1198 + print CLEAN "set -e\n\n";
1199 + print CLEAN "cd " . quoted_path(get_variable("mirror_path")) . "\n\n";
1200 + print CLEAN "echo 'Removing $total unnecessary files [$size_output]...'\n";
1201 + foreach (@rm_files)
1202 + {
1203 + print CLEAN "rm -f '$_'\n";
1204 + print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 500;
1205 + print CLEAN "echo -n .\n" unless $i % 10;
1206 + $i++;
1207 + }
1208 + print CLEAN "echo 'done.'\n";
1209 + print CLEAN "echo\n\n";
1210 +
1211 + $i = 0;
1212 + $total = scalar @rm_dirs;
1213 + print CLEAN "echo 'Removing $total unnecessary directories...'\n";
1214 + foreach (@rm_dirs)
1215 + {
1216 + print CLEAN "if test -d '$_'; then rm -fr '$_'; fi\n";
1217 + print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 50;
1218 + print CLEAN "echo -n .\n";
1219 + $i++;
1220 + }
1221 + print CLEAN "echo 'done.'\n";
1222 + print CLEAN "echo\n";
1223 +
1224 + close CLEAN;
1225 +
1226 + }
1227 +
1228 + # Make clean script executable
1229 + my $perm = ( stat get_variable("cleanscript") )[2] & 07777;
1230 + chmod( $perm | 0111, get_variable("cleanscript") );
1231 +
1232 + if ( get_variable("run_postmirror") )
1233 + {
1234 + print "Running the Post Mirror script ...\n";
1235 + print "(" . get_variable("postmirror_script") . ")\n\n";
1236 + if ( -x get_variable("postmirror_script") )
1237 + {
1238 + system( get_variable("postmirror_script"), '' );
1239 + }
1240 + else
1241 + {
1242 + system( '/bin/sh', get_variable("postmirror_script") );
1243 + }
1244 + print "\nPost Mirror script has completed. See above output for any possible errors.\n\n";
1245 + }
1246 +
1247 + unlock_aptmirror();
Newer Older