Last active 1727105354

apt-mirror Raw
1#!/usr/bin/perl
2
3=pod
4
5=head1 NAME
6
7apt-mirror - apt sources mirroring tool
8
9=head1 SYNOPSIS
10
11apt-mirror [configfile]
12
13=head1 DESCRIPTION
14
15A small and efficient tool that lets you mirror a part of or
16the whole Debian GNU/Linux distribution or any other apt sources.
17
18Main features:
19 * It uses a config similar to APT's F<sources.list>
20 * It's fully pool compliant
21 * It supports multithreaded downloading
22 * It supports multiple architectures at the same time
23 * It can automatically remove unneeded files
24 * It works well on an overloaded Internet connection
25 * It never produces an inconsistent mirror including while mirroring
26 * It works on all POSIX compliant systems with Perl and wget
27
28=head1 COMMENTS
29
30apt-mirror uses F</etc/apt/mirror.list> as a configuration file.
31By default it is tuned to official Debian or Ubuntu mirrors. Change
32it for your needs.
33
34After you setup the configuration file you may run as root:
35
36 # su - apt-mirror -c apt-mirror
37
38Or uncomment the line in F</etc/cron.d/apt-mirror> to enable daily mirror updates.
39
40=head1 FILES
41
42F</etc/apt/mirror.list>
43 Main configuration file
44
45F</etc/cron.d/apt-mirror>
46 Cron configuration template
47
48F</var/spool/apt-mirror/mirror>
49 Mirror places here
50
51F</var/spool/apt-mirror/skel>
52 Place for temporarily downloaded indexes
53
54F</var/spool/apt-mirror/var>
55 Log files placed here. URLs and MD5 checksums also here.
56
57=head1 CONFIGURATION EXAMPLES
58
59The mirror.list configuration supports many options, the file is well commented explaining each option.
60Here are some sample mirror configuration lines showing the various supported ways:
61
62Normal:
63deb http://example.com/debian stable main contrib non-free
64
65Arch Specific: (many other architectures are supported)
66deb-powerpc http://example.com/debian stable main contrib non-free
67
68HTTP and FTP Auth or non-standard port:
69deb http://user:pass@example.com:8080/debian stable main contrib non-free
70
71HTTPS with sending Basic HTTP authentication information (plaintext username and password) for all requests:
72(this was default behaviour of Wget 1.10.2 and prior and is needed for some servers with new version of Wget)
73set auth_no_challenge 1
74deb https://user:pass@example.com:443/debian stable main contrib non-free
75
76HTTPS without checking certificate:
77set no_check_certificate 1
78deb https://example.com:443/debian stable main contrib non-free
79
80Source Mirroring:
81deb-src http://example.com/debian stable main contrib non-free
82
83=head1 AUTHORS
84
85Dmitry N. Hramtsov E<lt>hdn@nsu.ruE<gt>
86Brandon Holtsclaw E<lt>me@brandonholtsclaw.comE<gt>
87
88=cut
89
90use warnings;
91use strict;
92use File::Copy;
93use File::Compare;
94use File::Path qw(make_path);
95use File::Basename;
96use Fcntl qw(:flock);
97
98my $config_file;
99
100my %config_variables = (
101 "defaultarch" => `dpkg --print-architecture 2>/dev/null` || 'i386',
102 "nthreads" => 20,
103 "base_path" => '/var/spool/apt-mirror',
104 "mirror_path" => '$base_path/mirror',
105 "skel_path" => '$base_path/skel',
106 "var_path" => '$base_path/var',
107 "cleanscript" => '$var_path/clean.sh',
108 "_contents" => 1,
109 "_autoclean" => 0,
110 "_tilde" => 0,
111 "_plus" => 0,
112 "limit_rate" => '100m',
113 "run_postmirror" => 1,
114 "auth_no_challenge" => 0,
115 "no_check_certificate" => 0,
116 "unlink" => 0,
117 "paranoid" => 0,
118 "postmirror_script" => '$var_path/postmirror.sh',
119 "use_proxy" => 'off',
120 "http_proxy" => '',
121 "https_proxy" => '',
122 "proxy_user" => '',
123 "proxy_password" => ''
124);
125
126my @config_binaries = ();
127my @config_sources = ();
128
129my @release_urls;
130my @index_urls;
131my @childrens = ();
132my %skipclean = ();
133my %clean_directory = ();
134my @hash_strength = qw(SHA512 SHA256 SHA1 MD5Sum);
135my %packages_hashes = (
136 SHA512 => "SHA512",
137 SHA256 => "SHA256",
138 SHA1 => "SHA1",
139 MD5Sum => "MD5sum",
140);
141my %sources_hashes = (
142 SHA512 => "Checksums-Sha512",
143 SHA256 => "Checksums-Sha256",
144 SHA1 => "Checksums-Sha1",
145 MD5Sum => "Files",
146);
147my %verify_commands = (
148 SHA512 => "sha512sum",
149 SHA256 => "sha256sum",
150 SHA1 => "sha1sum",
151 MD5Sum => "md5sum",
152);
153my %checksum_filenames = (
154 SHA512 => "SHA512",
155 SHA256 => "SHA256",
156 SHA1 => "SHA1",
157 MD5Sum => "MD5",
158);
159
160# Mapping of files downloaded from a by-hash directory to their canonical locations.
161my %hashsum_to_files = ();
162
163# Mapping of all the checksums for a given canonical filename.
164my %file_to_hashsums;
165my %urls_checksums = ();
166
167######################################################################################
168## Setting up $config_file variable
169
170$config_file = "/etc/apt/mirror.list"; # Default value
171if ( $_ = shift )
172{
173 die("apt-mirror: invalid config file specified") unless -e $_;
174 $config_file = $_;
175}
176
177chomp $config_variables{"defaultarch"};
178
179######################################################################################
180## Common subroutines
181
182sub round_number
183{
184 my $n = shift;
185 my $minus = $n < 0 ? '-' : '';
186 $n = abs($n);
187 $n = int( ( $n + .05 ) * 10 ) / 10;
188 $n .= '.0' unless $n =~ /\./;
189 $n .= '0' if substr( $n, ( length($n) - 1 ), 1 ) eq '.';
190 chop $n if $n =~ /\.\d\d0$/;
191 return "$minus$n";
192}
193
194sub format_bytes
195{
196 my $bytes = shift;
197 my $bytes_out = '0';
198 my $size_name = 'bytes';
199 my $KiB = 1024;
200 my $MiB = 1024 * 1024;
201 my $GiB = 1024 * 1024 * 1024;
202
203 if ( $bytes >= $KiB )
204 {
205 $bytes_out = $bytes / $KiB;
206 $size_name = 'KiB';
207 if ( $bytes >= $MiB )
208 {
209 $bytes_out = $bytes / $MiB;
210 $size_name = 'MiB';
211 if ( $bytes >= $GiB )
212 {
213 $bytes_out = $bytes / $GiB;
214 $size_name = 'GiB';
215 }
216 }
217 $bytes_out = round_number($bytes_out);
218 }
219 else
220 {
221 $bytes_out = $bytes;
222 $size_name = 'bytes';
223 }
224
225 return "$bytes_out $size_name";
226}
227
228sub get_variable
229{
230 my $value = $config_variables{ shift @_ };
231 my $count = 16;
232 while ( $value =~ s/\$(\w+)/$config_variables{$1}/xg )
233 {
234 die("apt-mirror: too many substitution while evaluating variable") if ( $count-- ) < 0;
235 }
236 return $value;
237}
238
239sub quoted_path
240{
241 my $path = shift;
242 $path =~ s/'/'\\''/g;
243 return "'" . $path . "'";
244}
245
246sub lock_aptmirror
247{
248 open( LOCK_FILE, '>', get_variable("var_path") . "/apt-mirror.lock" );
249 my $lock = flock( LOCK_FILE, LOCK_EX | LOCK_NB );
250 if ( !$lock )
251 {
252 die("apt-mirror is already running, exiting");
253 }
254}
255
256sub unlock_aptmirror
257{
258 close(LOCK_FILE);
259 unlink( get_variable("var_path") . "/apt-mirror.lock" );
260}
261
262sub delete_corrupted_files
263{
264 my $stage = shift;
265 my $found = 0;
266 foreach my $hash (@hash_strength)
267 {
268 my $file = get_variable("var_path") . "/${stage}-${hash}";
269 if (-s $file)
270 {
271 my $pipe;
272 open $pipe, "-|", qq(env LC_ALL=C ${verify_commands{$hash}} --check --quiet ${file} 2>/dev/null) or die "Cannot run ${verify_commands{$hash}}";
273 while (<$pipe>)
274 {
275 my ($filename) = /^(.*): FAILED/;
276 if (-f $filename)
277 {
278 $found++;
279 print "$filename is corrupted, deleting....\n";
280 unlink $filename or die "Cannot delete $filename.";
281 }
282 }
283 close $pipe;
284 }
285 }
286 return $found;
287}
288
289sub download_urls
290{
291 my $stage = shift;
292 my @urls;
293 my $i = 0;
294 my $pid;
295 my $nthreads = get_variable("nthreads");
296 my @args = ();
297 local $| = 1;
298
299 @urls = @_;
300 $nthreads = @urls if @urls < $nthreads;
301
302 if ( get_variable("auth_no_challenge") == 1 ) { push( @args, "--auth-no-challenge" ); }
303 if ( get_variable("no_check_certificate") == 1 ) { push( @args, "--no-check-certificate" ); }
304 if ( get_variable("unlink") == 1 ) { push( @args, "--unlink" ); }
305 if ( length( get_variable("use_proxy") ) && ( get_variable("use_proxy") eq 'yes' || get_variable("use_proxy") eq 'on' ) )
306 {
307 if ( length( get_variable("http_proxy") ) || length( get_variable("https_proxy") ) ) { push( @args, "-e use_proxy=yes" ); }
308 if ( length( get_variable("http_proxy") ) ) { push( @args, "-e http_proxy=" . get_variable("http_proxy") ); }
309 if ( length( get_variable("https_proxy") ) ) { push( @args, "-e https_proxy=" . get_variable("https_proxy") ); }
310 if ( length( get_variable("proxy_user") ) ) { push( @args, "-e proxy_user=" . get_variable("proxy_user") ); }
311 if ( length( get_variable("proxy_password") ) ) { push( @args, "-e proxy_password=" . get_variable("proxy_password") ); }
312 }
313 print "Downloading " . scalar(@urls) . " $stage files using $nthreads threads...\n";
314
315 if (get_variable("paranoid"))
316 {
317 my %fh = ();
318 foreach my $hash (@hash_strength)
319 {
320 open $fh{$hash}, ">", get_variable("var_path") . "/${stage}-${hash}" or die ("apt-mirror: Cannot write to ${stage}-${hash}");
321 }
322
323 foreach (@urls)
324 {
325 if ($urls_checksums{$_})
326 {
327 my ($hash, $hashsum) = @{$urls_checksums{$_}};
328 my $fh = $fh{$hash};
329 print $fh $hashsum . " " . sanitise_uri($_) . "\n";
330 }
331 }
332 foreach my $hash (@hash_strength)
333 {
334 close $fh{$hash};
335 }
336 }
337
338 my @url_fds;
339 for ($i=0; $i<$nthreads; $i++)
340 {
341 open ( $url_fds[$i], ">", get_variable("var_path") . "/$stage-urls.$i") or die("apt-mirror: can't write to intermediate file ($stage-urls.$i)");
342 }
343
344 for ($i=scalar(@urls)-1; $i>=0; $i--)
345 {
346 my $thread = $i % $nthreads;
347 print { $url_fds[$thread] } $urls[$i] . "\n";
348 }
349
350 foreach (@url_fds) {
351 close $_ or die("apt-mirror: can't close intermediate file ($stage-urls.$i)");
352 }
353
354 for ($i=0; $i<$nthreads; $i++)
355 {
356
357 $pid = fork();
358
359 die("apt-mirror: can't do fork in download_urls") if !defined($pid);
360
361 if ( $pid == 0 )
362 {
363 exec 'wget', '--no-if-modified-since', '--no-cache', '--limit-rate=' . get_variable("limit_rate"), '-T', '60', '-t', '1', '-r', '-N', '-l', 'inf', '-o', get_variable("var_path") . "/$stage-log.$i", '-i', get_variable("var_path") . "/$stage-urls.$i", @args;
364
365 # shouldn't reach this unless exec fails
366 die("\n\nCould not run wget, please make sure its installed and in your path\n\n");
367 }
368
369 push @childrens, $pid;
370 }
371
372 print "Begin time: " . localtime() . "\n[" . scalar(@childrens) . "]... ";
373 while ( scalar @childrens )
374 {
375 my $dead = wait();
376 @childrens = grep { $_ != $dead } @childrens;
377 print "[" . scalar(@childrens) . "]... ";
378 }
379 print "\nEnd time: " . localtime() . "\n\n";
380
381 if (get_variable("paranoid"))
382 {
383 if (delete_corrupted_files($stage) > 0)
384 {
385 die "Some files were corrupted while downloading, aborting...";
386 }
387 }
388
389 if (scalar keys %hashsum_to_files > 0)
390 {
391 foreach my $hashsum_filename (keys %hashsum_to_files)
392 {
393 foreach my $filename (@{$hashsum_to_files{$hashsum_filename}})
394 {
395 copy_file( $hashsum_filename, $filename );
396 }
397 }
398 }
399
400}
401
402## Parse config
403
404sub parse_config_line
405{
406 my $pattern_deb_line = qr/^[\t ]*(?<type>deb-src|deb)(?:-(?<arch>[\w\-]+))?[\t ]+(?:\[(?<options>[^\]]+)\][\t ]+)?(?<uri>[^\s]+)[\t ]+(?<components>.+)$/;
407 my $line = $_;
408 my %config;
409 if ( $line =~ $pattern_deb_line ) {
410 $config{'type'} = $+{type};
411 $config{'arch'} = $+{arch};
412 $config{'options'} = $+{options} ? $+{options} : "";
413 $config{'uri'} = $+{uri};
414 $config{'components'} = $+{components};
415 if ( $config{'options'} =~ /arch=((?<arch>[\w\-]+)[,]*)/g ) {
416 $config{'arch'} = $+{arch};
417 }
418 $config{'components'} = [ split /\s+/, $config{'components'} ];
419 } elsif ( $line =~ /set[\t ]+(?<key>[^\s]+)[\t ]+(?<value>"[^"]+"|'[^']+'|[^\s]+)/ ) {
420 $config{'type'} = 'set';
421 $config{'key'} = $+{key};
422 $config{'value'} = $+{value};
423 $config{'value'} =~ s/^'(.*)'$/$1/;
424 $config{'value'} =~ s/^"(.*)"$/$1/;
425 } elsif ( $line =~ /(?<type>clean|skip-clean)[\t ]+(?<uri>[^\s]+)/ ) {
426 $config{'type'} = $+{type};
427 $config{'uri'} = $+{uri};
428 }
429
430 return %config;
431}
432
433sub sanitise_uri
434{
435 my $uri = shift;
436 $uri =~ s[^(\w+)://][];
437 $uri =~ s/^([^@]+)?@?// if (split '/',$uri)[0] =~ /@/;
438 $uri =~ s/~/\%7E/g if get_variable("_tilde");
439 $uri =~ s/\+/\%2B/g if get_variable("_plus");
440 $uri =~ s[/$][];
441 return $uri;
442}
443
444open CONFIG, "<$config_file" or die("apt-mirror: can't open config file ($config_file)");
445while (<CONFIG>)
446{
447 next if /^\s*#/;
448 next unless /\S/;
449 my $line = $_;
450 my %config_line = parse_config_line;
451
452 if ( $config_line{'type'} eq "set" ) {
453 $config_variables{ $config_line{'key'} } = $config_line{'value'};
454 next;
455 } elsif ( $config_line{'type'} eq "deb" ) {
456 my $arch = $config_line{'arch'};
457 $arch = get_variable("defaultarch") if ! defined $config_line{'arch'};
458 push @config_binaries, [ $arch, $config_line{'uri'}, @{$config_line{'components'}} ];
459 next;
460 } elsif ( $config_line{'type'} eq "deb-src" ) {
461 push @config_sources, [ $config_line{'uri'}, @{$config_line{'components'}} ];
462 next;
463 } elsif ( $config_line{'type'} =~ /(skip-clean|clean)/ ) {
464 my $link = sanitise_uri($config_line{'uri'});
465 if ( $config_line{'type'} eq "skip-clean" ) {
466 $skipclean{ $link } = 1;
467 } elsif ( $config_line{'type'} eq "clean" ) {
468 $clean_directory{ $link } = 1;
469 }
470 next;
471 }
472
473 die("apt-mirror: invalid line in config file ($.: $line ...)");
474}
475close CONFIG;
476
477die("Please explicitly specify 'defaultarch' in mirror.list") unless get_variable("defaultarch");
478
479######################################################################################
480## Create the 3 needed directories if they don't exist yet
481my @needed_directories = ( get_variable("mirror_path"), get_variable("skel_path"), get_variable("var_path") );
482foreach my $needed_directory (@needed_directories)
483{
484 unless ( -d $needed_directory )
485 {
486 make_path($needed_directory) or die("apt-mirror: can't create $needed_directory directory");
487 }
488}
489#
490#######################################################################################
491
492lock_aptmirror();
493
494######################################################################################
495## Skel download
496
497my %urls_to_download = ();
498my ( $url, $arch );
499
500sub remove_double_slashes
501{
502 local $_ = shift;
503 while (s[/\./][/]g) { }
504 while (s[(?<!:)//][/]g) { }
505 while (s[(?<!:/)/[^/]+/\.\./][/]g) { }
506 s/~/\%7E/g if get_variable("_tilde");
507 s/\+/\%2B/g if get_variable("_plus");
508 return $_;
509}
510
511sub add_url_to_download
512{
513 my $url = remove_double_slashes(shift);
514 my $size = shift;
515 my $strongest_hash = shift;
516 my $hash = shift;
517 my $hashsum = shift;
518 my $acquire_by_hash = shift;
519
520 my $canonical_filename = sanitise_uri($url);
521 $skipclean{$canonical_filename} = 1;
522
523 if ($acquire_by_hash)
524 {
525 # If the optional hashsum was passed as an argument
526 # - download the strongest hash only
527 # - make a copy to the canonical location
528 # - make a copy for the other known hash versions
529
530 $url = dirname($url) . "/by-hash/${hash}/${hashsum}";
531
532 my $hashsum_filename = dirname($canonical_filename) . "/by-hash/${hash}/${hashsum}";
533 $skipclean{$hashsum_filename} = 1;
534
535 if ($hash eq $strongest_hash)
536 {
537 # This is the strongest hash, which is the one to download.
538 # Also need to remember to which canonical location it should be linked.
539 $hashsum_to_files{$hashsum_filename} ||= [];
540 push @{$hashsum_to_files{$hashsum_filename}}, $canonical_filename;
541 $urls_to_download{$url} = $size;
542 $urls_checksums{$url} = [ $hash, $hashsum ];
543
544 } else {
545 # We are not going to download using this checksum, but we still
546 # need to know where to put the checksum.
547 $file_to_hashsums{$canonical_filename} ||= [];
548 push @{$file_to_hashsums{$canonical_filename}}, $hashsum_filename;
549 }
550 } else {
551 # Not using by-hash, so download the file only.
552 $urls_to_download{$url} = $size;
553 if ($strongest_hash and ($hash eq $strongest_hash))
554 {
555 $urls_checksums{$url} = [ $hash, $hashsum ];
556 }
557 }
558}
559
560foreach (@config_sources)
561{
562 my ( $uri, $distribution, @components ) = @{$_};
563
564 if (@components)
565 {
566 $url = $uri . "/dists/" . $distribution . "/";
567 }
568 else
569 {
570 $url = $uri . "/" . $distribution . "/";
571 }
572
573 add_url_to_download( $url . "InRelease" );
574 add_url_to_download( $url . "Release" );
575 add_url_to_download( $url . "Release.gpg" );
576}
577
578foreach (@config_binaries)
579{
580 my ( $arch, $uri, $distribution, @components ) = @{$_};
581
582 if (@components)
583 {
584 $url = $uri . "/dists/" . $distribution . "/";
585
586 }
587 else
588 {
589 $url = $uri . "/" . $distribution . "/";
590 }
591
592 add_url_to_download( $url . "InRelease" );
593 add_url_to_download( $url . "Release" );
594 add_url_to_download( $url . "Release.gpg" );
595
596}
597
598chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel");
599@release_urls = sort keys %urls_to_download;
600download_urls( "release", @release_urls );
601
602######################################################################################
603## Download all relevant metadata
604
605%urls_to_download = ();
606
607sub find_metadata_in_release
608{
609 # Look in the Release file for any files we need to download
610 my ( $arch, $uri, $distribution, @components ) = @_;
611
612 my ( $release_uri, $release_path, $line ) = '';
613 my $component_regex = undef;
614 my $arch_regex = "(?:${arch}|all)";
615 my $compressed_extension_regex = '(?:\.(?:gz|bz2|xz|lzma))$';
616 my $dist_uri;
617 my $hash_type_regex = "(?:" . join("|", @hash_strength) . ")";
618
619 if (@components)
620 {
621 $dist_uri = remove_double_slashes($uri . "/dists/" . $distribution . "/");
622 $component_regex = "(?:" . join("|", @components) . ")";
623 }
624 else {
625 $dist_uri = remove_double_slashes($uri . "/" . $distribution . "/");
626 }
627
628 my $stream;
629 foreach my $release_filename ("InRelease", "Release")
630 {
631 $release_uri = $dist_uri . $release_filename;
632 $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);
633
634 last if ( open $stream, "<", $release_path);
635 $stream = undef;
636 }
637
638 unless ( $stream )
639 {
640 warn( "Failed to find InRelease or Release in " . get_variable("skel_path") . "/" . sanitise_uri($dist_uri) );
641 return 0;
642 }
643
644
645 my $hash = undef;
646 my %avaiable_hashes = ();
647 my $acquire_by_hash = 0;
648 my @parts_to_download = ();
649 while ( $line = <$stream> )
650 {
651 chomp $line;
652 if ($hash)
653 {
654 if ( $line =~ /^ +(.*)$/ )
655 {
656 my @parts = split( / +/, $1 );
657 if ( @parts == 3 )
658 {
659 my ( $hashsum, $size, $filename ) = @parts;
660 push @parts, $hash;
661 if ($arch eq "source")
662 {
663 if ($component_regex)
664 {
665 # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
666 if (
667 (
668 $filename =~ m{^${component_regex}/source/Sources${compressed_extension_regex}}
669 ) or (
670 $filename =~ m{^${component_regex}/Contents-source${compressed_extension_regex}}
671 )
672 )
673 {
674 push @parts_to_download, \@parts;
675 }
676 } else {
677 # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
678 if ($filename =~ m{^Sources${compressed_extension_regex}}
679 ) {
680 push @parts_to_download, \@parts;
681 }
682 }
683 } else {
684 if ($component_regex)
685 {
686 # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
687 if (
688 (
689 $filename =~ m{^${component_regex}/Contents-${arch_regex}${compressed_extension_regex}}
690 ) or (
691 $filename =~ m{^Contents-${arch_regex}${compressed_extension_regex}}
692 ) or (
693 $filename =~ m{^Packages${compressed_extension_regex}}
694 ) or (
695 $filename =~ m{^${component_regex}/binary-${arch_regex}/Packages${compressed_extension_regex}}
696 ) or (
697 $filename =~ m{^${component_regex}/binary-${arch_regex}/Release$}
698 ) or (
699 $filename =~ m{^${component_regex}/cnf/Commands-${arch_regex}${compressed_extension_regex}}
700 ) or (
701 $filename =~ m{^${component_regex}/dep11/Components-${arch_regex}.*${compressed_extension_regex}}
702 ) or (
703 $filename =~ m{^${component_regex}/dep11/icons-.*${compressed_extension_regex}}
704 ) or (
705 $filename =~ m{^${component_regex}/i18n/Translation-.*${compressed_extension_regex}}
706 )
707 )
708 {
709 push @parts_to_download, \@parts;
710 }
711 } else {
712 # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
713 if ($filename =~ m{^Packages${compressed_extension_regex}})
714 {
715 push @parts_to_download, \@parts;
716 }
717 }
718 }
719 }
720 else
721 {
722 warn("Malformed checksum line \"$1\" in $release_uri");
723 }
724 }
725 else
726 {
727 $hash = undef;
728 }
729 }
730 if ( not $hash )
731 {
732 if ( $line =~ /^(${hash_type_regex}):$/ )
733 {
734 $hash = $1;
735 $avaiable_hashes{$hash} = 1;
736 }
737 elsif ( $line eq "Acquire-By-Hash: yes" )
738 {
739 $acquire_by_hash = 1;
740 }
741 }
742 }
743 close $stream;
744
745 my $strongest_hash;
746 if ($acquire_by_hash)
747 {
748 foreach (@hash_strength)
749 {
750 if ($avaiable_hashes{$_})
751 {
752 $strongest_hash = $_;
753 last;
754 }
755 }
756 unless ($strongest_hash)
757 {
758 warn("Cannot find a supported hash in $release_uri, will download from canonical locations.");
759 $acquire_by_hash = 0;
760 }
761 }
762
763 foreach (@parts_to_download)
764 {
765 my ( $hashsum, $size, $filename, $hash ) = @{$_};
766 if ($acquire_by_hash)
767 {
768 add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 1 );
769 }
770 else
771 {
772 add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 0 );
773 }
774 }
775 return 1;
776}
777
778print "Processing metadata files from releases [";
779foreach (@config_binaries)
780{
781 my ( $arch, $uri, $distribution, @components ) = @{$_};
782 print "M";
783 unless (find_metadata_in_release( $arch, $uri, $distribution, @components))
784 {
785 # Insecure repo with no release file - try to get the well known indices
786 foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "")
787 {
788 if (@components)
789 {
790 # Debian repo
791 foreach my $component (@components)
792 {
793 foreach my $path (
794 "/dists/${distribution}/${component}/binary-${arch}/Packages",
795 "/dists/${distribution}/${component}/binary-all/Packages",
796 "/dists/${distribution}/${component}/Contents-${arch}",
797 "/dists/${distribution}/${component}/Contents-all",
798 "/dists/${distribution}/Contents-${arch}",
799 "/dists/${distribution}/Contents-all",
800 )
801 {
802 add_url_to_download( "${uri}/${path}${file_extension}" );
803 }
804 }
805 } else {
806 # Flat repo
807 foreach my $path (
808 "${distribution}/Packages",
809 "${distribution}/Contents-${arch}",
810 "${distribution}/Contents-all",
811 )
812 {
813 add_url_to_download( "${uri}/${path}${file_extension}" );
814 }
815 }
816 }
817 }
818}
819
820foreach (@config_sources)
821{
822 my ( $uri, $distribution, @components ) = @{$_};
823 print "M";
824 unless (find_metadata_in_release( "source", $uri, $distribution, @components))
825 {
826 # Insecure repo with no release file - try to get the well known indices
827 foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "")
828 {
829 if (@components)
830 {
831 # Debian repo
832 foreach my $path (
833 "${distribution}/source/Sources",
834 "${distribution}/Contents-source",
835 )
836 {
837 add_url_to_download( "${uri}/${path}${file_extension}" );
838 }
839 } else {
840 # Flat repo
841 add_url_to_download( "${uri}/${distribution}/Sources${file_extension}" );
842 }
843 }
844 }
845}
846print "]\n\n";
847
848@index_urls = sort keys %urls_to_download;
849download_urls( "index", @index_urls );
850
851######################################################################################
852## Main download preparations
853
854%urls_to_download = ();
855
856my %files_fh;
857
858open $files_fh{ALL}, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)");
859open $files_fh{NEW}, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)");
860foreach my $hash (@hash_strength)
861{
862 open $files_fh{$hash}, ">" . get_variable("var_path") . "/" . ${checksum_filenames{$hash}} or die("apt-mirror: can't write to intermediate file (${hash})");
863}
864
865my %stat_cache = ();
866
867sub _stat
868{
869 my ($filename) = shift;
870 return @{ $stat_cache{$filename} } if exists $stat_cache{$filename};
871 my @res = stat($filename);
872 $stat_cache{$filename} = \@res;
873 return @res;
874}
875
876sub clear_stat_cache
877{
878 %stat_cache = ();
879}
880
881sub need_update
882{
883 my $filename = shift;
884 my $size_on_server = shift;
885
886 my ( undef, undef, undef, undef, undef, undef, undef, $size ) = _stat($filename);
887
888 return 1 unless ($size);
889 return 0 if $size_on_server == $size;
890
891 if ( get_variable("unlink") == 1 )
892 {
893 unlink $filename;
894 }
895 return 1;
896}
897
898sub process_index
899{
900 my $uri = shift;
901 my $index = shift;
902 my $optional = shift;
903 my ( $path, $package, $mirror, $files ) = '';
904
905 $path = sanitise_uri($uri);
906 local $/ = "\n\n";
907 $mirror = get_variable("mirror_path") . "/" . $path;
908
909 if (-e "$path/$index.gz" )
910 {
911 system("gunzip < $path/$index.gz > $path/$index");
912 }
913 elsif (-e "$path/$index.xz" )
914 {
915 system("xz -d < $path/$index.xz > $path/$index");
916 }
917 elsif (-e "$path/$index.lzma" )
918 {
919 system("xz -d < $path/$index.xz > $path/$index");
920 }
921 elsif (-e "$path/$index.bz2" )
922 {
923 system("bzip2 -d < $path/$index.bz2 > $path/$index");
924 }
925
926 unless ( open STREAM, "<$path/$index" )
927 {
928 if ($optional)
929 {
930 return;
931 }
932 warn("apt-mirror: can't open index $path/$index in process_index");
933 return;
934 }
935
936 while ( $package = <STREAM> )
937 {
938 local $/ = "\n";
939 chomp $package;
940 my ( undef, %lines ) = split( /^([\w\-]+): */m, $package );
941
942 chomp(%lines);
943
944 if ( exists $lines{"Filename"} )
945 { # Packages index
946 my $filename = remove_double_slashes( $path . "/" . $lines{"Filename"});
947 $skipclean{ $filename } = 1;
948 print { $files_fh{ALL} } $filename . "\n";
949 foreach my $hash (@hash_strength)
950 {
951 my $index_hash = $packages_hashes{$hash};
952 print { $files_fh{$hash} } $lines{$index_hash} . " " . $filename . "\n" if $lines{$index_hash};
953 }
954 if ( need_update( $mirror . "/" . $lines{"Filename"}, $lines{"Size"} ) )
955 {
956 my $hashsum = undef;
957 my $hash = undef;
958 foreach $hash (@hash_strength)
959 {
960 my $index_hash = $packages_hashes{$hash};
961 if ($lines{$index_hash})
962 {
963 $hashsum = ${lines{$index_hash}};
964 last;
965 }
966 }
967 print { $files_fh{NEW} } $filename. "\n";
968 add_url_to_download( $uri . "/" . $lines{"Filename"}, $lines{"Size"}, $hash, $hash, $hashsum, 0 );
969 }
970 }
971 else
972 { # Sources index
973 $lines{"Directory"} = "" unless defined $lines{"Directory"};
974 foreach my $hash (@hash_strength)
975 {
976 my $index_hash = $sources_hashes{$hash};
977 if ($lines{$index_hash})
978 {
979 foreach ( split( /\n/, $lines{$index_hash} ) )
980 {
981 next if $_ eq '';
982 my @file = split;
983 die("apt-mirror: invalid Sources format") if @file != 3;
984 my $download_url = $uri . "/" . $lines{"Directory"} . "/" . $file[2];
985 my $filename = remove_double_slashes( $path . "/" . $lines{"Directory"} . "/" . $file[2] );
986 print { $files_fh{$hash} } $file[0] . " " . ${filename} . "\n";
987
988 unless ($skipclean{ $filename })
989 {
990 $skipclean{ $filename } = 1;
991 print { $files_fh{ALL} } ${filename} . "\n";
992 if ( need_update( $mirror . "/" . $lines{"Directory"} . "/" . $file[2], $file[1] ) )
993 {
994 print { $files_fh{NEW} } ${download_url} . "\n";
995 add_url_to_download( $uri . "/" . $lines{"Directory"} . "/" . $file[2], $file[1], $hash, $hash, $file[0], 0 );
996 }
997 }
998 }
999 }
1000 }
1001 }
1002 }
1003
1004 close STREAM;
1005}
1006
1007print "Processing indexes: [";
1008
1009foreach (@config_sources)
1010{
1011 my ( $uri, $distribution, @components ) = @{$_};
1012 print "S";
1013 if (@components)
1014 {
1015 my $component;
1016 foreach $component (@components)
1017 {
1018 process_index( $uri, "/dists/$distribution/$component/source/Sources" );
1019 }
1020 }
1021 else
1022 {
1023 process_index( $uri, "/$distribution/Sources" );
1024 }
1025}
1026
1027foreach (@config_binaries)
1028{
1029 my ( $arch, $uri, $distribution, @components ) = @{$_};
1030 print "P";
1031 if (@components)
1032 {
1033 my $component;
1034 foreach $component (@components)
1035 {
1036 process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" );
1037 process_index( $uri, "/dists/$distribution/$component/binary-all/Packages", 1 );
1038 }
1039 }
1040 else
1041 {
1042 process_index( $uri, "/$distribution/Packages" );
1043 }
1044}
1045
1046clear_stat_cache();
1047
1048print "]\n\n";
1049
1050foreach my $fh (values %files_fh)
1051{
1052 close $fh;
1053}
1054
1055######################################################################################
1056## Main download
1057
1058chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");
1059
1060my $need_bytes = 0;
1061foreach ( values %urls_to_download )
1062{
1063 $need_bytes += $_;
1064}
1065
1066my $size_output = format_bytes($need_bytes);
1067
1068print "$size_output will be downloaded into archive.\n";
1069
1070download_urls( "archive", sort keys %urls_to_download );
1071
1072######################################################################################
1073## Copy skel to main archive
1074
1075sub copy_file
1076{
1077 my ( $from, $to ) = @_;
1078 my $dir = dirname($to);
1079 return unless -f $from;
1080 make_path($dir) unless -d $dir;
1081 if ( get_variable("unlink") == 1 )
1082 {
1083 if ( compare( $from, $to ) != 0 ) { unlink($to); }
1084 }
1085 my @stat_from = stat($from);
1086 if ( -f $to )
1087 {
1088 my @stat_to = stat($to);
1089 return if ("@stat_to" eq "@stat_from");
1090 }
1091
1092 unless ( link( $from, $to ) or copy( $from, $to ) )
1093 {
1094 warn("apt-mirror: can't copy $from to $to");
1095 return;
1096 }
1097 my ( $atime, $mtime ) = @stat_from[ 8, 9 ];
1098 utime( $atime, $mtime, $to ) or die("apt-mirror: can't utime $to");
1099}
1100
1101foreach (@release_urls, @index_urls)
1102{
1103 die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][];
1104 copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") );
1105
1106 my $sanitized_uri = sanitise_uri($_);
1107
1108 # If we downloaded any files from a checksum location, now is the time to
1109 # populate the canonical filename.
1110 if ($hashsum_to_files{$sanitized_uri})
1111 {
1112 foreach my $filename (@{$hashsum_to_files{$sanitized_uri}})
1113 {
1114 copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $filename );
1115 if ($file_to_hashsums{$filename})
1116 {
1117 foreach my $hashsum_filename (@{$file_to_hashsums{$filename}})
1118 {
1119 copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $hashsum_filename );
1120 }
1121 }
1122 }
1123 }
1124}
1125
1126######################################################################################
1127## Make cleaning script
1128
1129my ( @rm_dirs, @rm_files ) = ();
1130my $unnecessary_bytes = 0;
1131
1132sub process_symlink
1133{
1134 return 1; # symlinks are always needed
1135}
1136
1137sub process_file
1138{
1139 my $file = shift;
1140 $file =~ s[~][%7E]g if get_variable("_tilde");
1141 $file =~ s[\+][%2B]g if get_variable("_plus");
1142 return 1 if $skipclean{$file};
1143 push @rm_files, sanitise_uri($file);
1144 my ( undef, undef, undef, undef, undef, undef, undef, $size, undef, undef, undef, undef, $blocks ) = stat($file);
1145 $unnecessary_bytes += $blocks * 512;
1146 return 0;
1147}
1148
1149sub process_directory
1150{
1151 my $dir = shift;
1152 my $is_needed = 0;
1153 return 1 if $skipclean{$dir};
1154 opendir( my $dir_h, $dir ) or die "apt-mirror: can't opendir $dir: $!";
1155 foreach ( grep { !/^\.$/ && !/^\.\.$/ } readdir($dir_h) )
1156 {
1157 my $item = $dir . "/" . $_;
1158 $is_needed |= process_directory($item) if -d $item && !-l $item;
1159 $is_needed |= process_file($item) if -f $item;
1160 $is_needed |= process_symlink($item) if -l $item;
1161 }
1162 closedir $dir_h;
1163 push @rm_dirs, $dir unless $is_needed;
1164 return $is_needed;
1165}
1166
1167chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");
1168
1169foreach ( keys %clean_directory )
1170{
1171 process_directory($_) if -d $_ && !-l $_;
1172}
1173
1174open CLEAN, ">" . get_variable("cleanscript") or die("apt-mirror: can't open clean script file");
1175
1176my ( $i, $total ) = ( 0, scalar @rm_files );
1177
1178if ( get_variable("_autoclean") )
1179{
1180
1181 my $size_output = format_bytes($unnecessary_bytes);
1182 print "$size_output in $total files and " . scalar(@rm_dirs) . " directories will be freed...";
1183
1184 chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");
1185
1186 foreach (@rm_files) { unlink $_; }
1187 foreach (@rm_dirs) { rmdir $_; }
1188
1189}
1190else
1191{
1192
1193 my $size_output = format_bytes($unnecessary_bytes);
1194 print "$size_output in $total files and " . scalar(@rm_dirs) . " directories can be freed.\n";
1195 print "Run " . get_variable("cleanscript") . " for this purpose.\n\n";
1196
1197 print CLEAN "#!/bin/sh\n";
1198 print CLEAN "set -e\n\n";
1199 print CLEAN "cd " . quoted_path(get_variable("mirror_path")) . "\n\n";
1200 print CLEAN "echo 'Removing $total unnecessary files [$size_output]...'\n";
1201 foreach (@rm_files)
1202 {
1203 print CLEAN "rm -f '$_'\n";
1204 print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 500;
1205 print CLEAN "echo -n .\n" unless $i % 10;
1206 $i++;
1207 }
1208 print CLEAN "echo 'done.'\n";
1209 print CLEAN "echo\n\n";
1210
1211 $i = 0;
1212 $total = scalar @rm_dirs;
1213 print CLEAN "echo 'Removing $total unnecessary directories...'\n";
1214 foreach (@rm_dirs)
1215 {
1216 print CLEAN "if test -d '$_'; then rm -fr '$_'; fi\n";
1217 print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 50;
1218 print CLEAN "echo -n .\n";
1219 $i++;
1220 }
1221 print CLEAN "echo 'done.'\n";
1222 print CLEAN "echo\n";
1223
1224 close CLEAN;
1225
1226}
1227
1228# Make clean script executable
1229my $perm = ( stat get_variable("cleanscript") )[2] & 07777;
1230chmod( $perm | 0111, get_variable("cleanscript") );
1231
1232if ( get_variable("run_postmirror") )
1233{
1234 print "Running the Post Mirror script ...\n";
1235 print "(" . get_variable("postmirror_script") . ")\n\n";
1236 if ( -x get_variable("postmirror_script") )
1237 {
1238 system( get_variable("postmirror_script"), '' );
1239 }
1240 else
1241 {
1242 system( '/bin/sh', get_variable("postmirror_script") );
1243 }
1244 print "\nPost Mirror script has completed. See above output for any possible errors.\n\n";
1245}
1246
1247unlock_aptmirror();
1248