apt-mirror
· 38 KiB · Text
Raw
#!/usr/bin/perl
=pod
=head1 NAME
apt-mirror - apt sources mirroring tool
=head1 SYNOPSIS
apt-mirror [configfile]
=head1 DESCRIPTION
A small and efficient tool that lets you mirror a part of or
the whole Debian GNU/Linux distribution or any other apt sources.
Main features:
* It uses a config similar to APT's F<sources.list>
* It's fully pool compliant
* It supports multithreaded downloading
* It supports multiple architectures at the same time
* It can automatically remove unneeded files
* It works well on an overloaded Internet connection
* It never produces an inconsistent mirror including while mirroring
* It works on all POSIX compliant systems with Perl and wget
=head1 COMMENTS
apt-mirror uses F</etc/apt/mirror.list> as a configuration file.
By default it is tuned to official Debian or Ubuntu mirrors. Change
it for your needs.
After you setup the configuration file you may run as root:
# su - apt-mirror -c apt-mirror
Or uncomment the line in F</etc/cron.d/apt-mirror> to enable daily mirror updates.
=head1 FILES
F</etc/apt/mirror.list>
Main configuration file
F</etc/cron.d/apt-mirror>
Cron configuration template
F</var/spool/apt-mirror/mirror>
Mirror places here
F</var/spool/apt-mirror/skel>
Place for temporarily downloaded indexes
F</var/spool/apt-mirror/var>
Log files placed here. URLs and MD5 checksums also here.
=head1 CONFIGURATION EXAMPLES
The mirror.list configuration supports many options, the file is well commented explaining each option.
Here are some sample mirror configuration lines showing the various supported ways:
Normal:
deb http://example.com/debian stable main contrib non-free
Arch Specific: (many other architectures are supported)
deb-powerpc http://example.com/debian stable main contrib non-free
HTTP and FTP Auth or non-standard port:
deb http://user:pass@example.com:8080/debian stable main contrib non-free
HTTPS with sending Basic HTTP authentication information (plaintext username and password) for all requests:
(this was default behaviour of Wget 1.10.2 and prior and is needed for some servers with new version of Wget)
set auth_no_challenge 1
deb https://user:pass@example.com:443/debian stable main contrib non-free
HTTPS without checking certificate:
set no_check_certificate 1
deb https://example.com:443/debian stable main contrib non-free
Source Mirroring:
deb-src http://example.com/debian stable main contrib non-free
=head1 AUTHORS
Dmitry N. Hramtsov E<lt>hdn@nsu.ruE<gt>
Brandon Holtsclaw E<lt>me@brandonholtsclaw.comE<gt>
=cut
use warnings;
use strict;
use File::Copy;
use File::Compare;
use File::Path qw(make_path);
use File::Basename;
use Fcntl qw(:flock);
my $config_file;
my %config_variables = (
"defaultarch" => `dpkg --print-architecture 2>/dev/null` || 'i386',
"nthreads" => 20,
"base_path" => '/var/spool/apt-mirror',
"mirror_path" => '$base_path/mirror',
"skel_path" => '$base_path/skel',
"var_path" => '$base_path/var',
"cleanscript" => '$var_path/clean.sh',
"_contents" => 1,
"_autoclean" => 0,
"_tilde" => 0,
"_plus" => 0,
"limit_rate" => '100m',
"run_postmirror" => 1,
"auth_no_challenge" => 0,
"no_check_certificate" => 0,
"unlink" => 0,
"paranoid" => 0,
"postmirror_script" => '$var_path/postmirror.sh',
"use_proxy" => 'off',
"http_proxy" => '',
"https_proxy" => '',
"proxy_user" => '',
"proxy_password" => ''
);
my @config_binaries = ();
my @config_sources = ();
my @release_urls;
my @index_urls;
my @childrens = ();
my %skipclean = ();
my %clean_directory = ();
my @hash_strength = qw(SHA512 SHA256 SHA1 MD5Sum);
my %packages_hashes = (
SHA512 => "SHA512",
SHA256 => "SHA256",
SHA1 => "SHA1",
MD5Sum => "MD5sum",
);
my %sources_hashes = (
SHA512 => "Checksums-Sha512",
SHA256 => "Checksums-Sha256",
SHA1 => "Checksums-Sha1",
MD5Sum => "Files",
);
my %verify_commands = (
SHA512 => "sha512sum",
SHA256 => "sha256sum",
SHA1 => "sha1sum",
MD5Sum => "md5sum",
);
my %checksum_filenames = (
SHA512 => "SHA512",
SHA256 => "SHA256",
SHA1 => "SHA1",
MD5Sum => "MD5",
);
# Mapping of files downloaded from a by-hash directory to their canonical locations.
my %hashsum_to_files = ();
# Mapping of all the checksums for a given canonical filename.
my %file_to_hashsums;
my %urls_checksums = ();
######################################################################################
## Setting up $config_file variable
$config_file = "/etc/apt/mirror.list"; # Default value
if ( $_ = shift )
{
die("apt-mirror: invalid config file specified") unless -e $_;
$config_file = $_;
}
chomp $config_variables{"defaultarch"};
######################################################################################
## Common subroutines
sub round_number
{
my $n = shift;
my $minus = $n < 0 ? '-' : '';
$n = abs($n);
$n = int( ( $n + .05 ) * 10 ) / 10;
$n .= '.0' unless $n =~ /\./;
$n .= '0' if substr( $n, ( length($n) - 1 ), 1 ) eq '.';
chop $n if $n =~ /\.\d\d0$/;
return "$minus$n";
}
sub format_bytes
{
my $bytes = shift;
my $bytes_out = '0';
my $size_name = 'bytes';
my $KiB = 1024;
my $MiB = 1024 * 1024;
my $GiB = 1024 * 1024 * 1024;
if ( $bytes >= $KiB )
{
$bytes_out = $bytes / $KiB;
$size_name = 'KiB';
if ( $bytes >= $MiB )
{
$bytes_out = $bytes / $MiB;
$size_name = 'MiB';
if ( $bytes >= $GiB )
{
$bytes_out = $bytes / $GiB;
$size_name = 'GiB';
}
}
$bytes_out = round_number($bytes_out);
}
else
{
$bytes_out = $bytes;
$size_name = 'bytes';
}
return "$bytes_out $size_name";
}
sub get_variable
{
my $value = $config_variables{ shift @_ };
my $count = 16;
while ( $value =~ s/\$(\w+)/$config_variables{$1}/xg )
{
die("apt-mirror: too many substitution while evaluating variable") if ( $count-- ) < 0;
}
return $value;
}
sub quoted_path
{
my $path = shift;
$path =~ s/'/'\\''/g;
return "'" . $path . "'";
}
sub lock_aptmirror
{
open( LOCK_FILE, '>', get_variable("var_path") . "/apt-mirror.lock" );
my $lock = flock( LOCK_FILE, LOCK_EX | LOCK_NB );
if ( !$lock )
{
die("apt-mirror is already running, exiting");
}
}
sub unlock_aptmirror
{
close(LOCK_FILE);
unlink( get_variable("var_path") . "/apt-mirror.lock" );
}
sub delete_corrupted_files
{
my $stage = shift;
my $found = 0;
foreach my $hash (@hash_strength)
{
my $file = get_variable("var_path") . "/${stage}-${hash}";
if (-s $file)
{
my $pipe;
open $pipe, "-|", qq(env LC_ALL=C ${verify_commands{$hash}} --check --quiet ${file} 2>/dev/null) or die "Cannot run ${verify_commands{$hash}}";
while (<$pipe>)
{
my ($filename) = /^(.*): FAILED/;
if (-f $filename)
{
$found++;
print "$filename is corrupted, deleting....\n";
unlink $filename or die "Cannot delete $filename.";
}
}
close $pipe;
}
}
return $found;
}
sub download_urls
{
my $stage = shift;
my @urls;
my $i = 0;
my $pid;
my $nthreads = get_variable("nthreads");
my @args = ();
local $| = 1;
@urls = @_;
$nthreads = @urls if @urls < $nthreads;
if ( get_variable("auth_no_challenge") == 1 ) { push( @args, "--auth-no-challenge" ); }
if ( get_variable("no_check_certificate") == 1 ) { push( @args, "--no-check-certificate" ); }
if ( get_variable("unlink") == 1 ) { push( @args, "--unlink" ); }
if ( length( get_variable("use_proxy") ) && ( get_variable("use_proxy") eq 'yes' || get_variable("use_proxy") eq 'on' ) )
{
if ( length( get_variable("http_proxy") ) || length( get_variable("https_proxy") ) ) { push( @args, "-e use_proxy=yes" ); }
if ( length( get_variable("http_proxy") ) ) { push( @args, "-e http_proxy=" . get_variable("http_proxy") ); }
if ( length( get_variable("https_proxy") ) ) { push( @args, "-e https_proxy=" . get_variable("https_proxy") ); }
if ( length( get_variable("proxy_user") ) ) { push( @args, "-e proxy_user=" . get_variable("proxy_user") ); }
if ( length( get_variable("proxy_password") ) ) { push( @args, "-e proxy_password=" . get_variable("proxy_password") ); }
}
print "Downloading " . scalar(@urls) . " $stage files using $nthreads threads...\n";
if (get_variable("paranoid"))
{
my %fh = ();
foreach my $hash (@hash_strength)
{
open $fh{$hash}, ">", get_variable("var_path") . "/${stage}-${hash}" or die ("apt-mirror: Cannot write to ${stage}-${hash}");
}
foreach (@urls)
{
if ($urls_checksums{$_})
{
my ($hash, $hashsum) = @{$urls_checksums{$_}};
my $fh = $fh{$hash};
print $fh $hashsum . " " . sanitise_uri($_) . "\n";
}
}
foreach my $hash (@hash_strength)
{
close $fh{$hash};
}
}
my @url_fds;
for ($i=0; $i<$nthreads; $i++)
{
open ( $url_fds[$i], ">", get_variable("var_path") . "/$stage-urls.$i") or die("apt-mirror: can't write to intermediate file ($stage-urls.$i)");
}
for ($i=scalar(@urls)-1; $i>=0; $i--)
{
my $thread = $i % $nthreads;
print { $url_fds[$thread] } $urls[$i] . "\n";
}
foreach (@url_fds) {
close $_ or die("apt-mirror: can't close intermediate file ($stage-urls.$i)");
}
for ($i=0; $i<$nthreads; $i++)
{
$pid = fork();
die("apt-mirror: can't do fork in download_urls") if !defined($pid);
if ( $pid == 0 )
{
exec 'wget', '--no-if-modified-since', '--no-cache', '--limit-rate=' . get_variable("limit_rate"), '-T', '60', '-t', '1', '-r', '-N', '-l', 'inf', '-o', get_variable("var_path") . "/$stage-log.$i", '-i', get_variable("var_path") . "/$stage-urls.$i", @args;
# shouldn't reach this unless exec fails
die("\n\nCould not run wget, please make sure its installed and in your path\n\n");
}
push @childrens, $pid;
}
print "Begin time: " . localtime() . "\n[" . scalar(@childrens) . "]... ";
while ( scalar @childrens )
{
my $dead = wait();
@childrens = grep { $_ != $dead } @childrens;
print "[" . scalar(@childrens) . "]... ";
}
print "\nEnd time: " . localtime() . "\n\n";
if (get_variable("paranoid"))
{
if (delete_corrupted_files($stage) > 0)
{
die "Some files were corrupted while downloading, aborting...";
}
}
if (scalar keys %hashsum_to_files > 0)
{
foreach my $hashsum_filename (keys %hashsum_to_files)
{
foreach my $filename (@{$hashsum_to_files{$hashsum_filename}})
{
copy_file( $hashsum_filename, $filename );
}
}
}
}
## Parse config
sub parse_config_line
{
my $pattern_deb_line = qr/^[\t ]*(?<type>deb-src|deb)(?:-(?<arch>[\w\-]+))?[\t ]+(?:\[(?<options>[^\]]+)\][\t ]+)?(?<uri>[^\s]+)[\t ]+(?<components>.+)$/;
my $line = $_;
my %config;
if ( $line =~ $pattern_deb_line ) {
$config{'type'} = $+{type};
$config{'arch'} = $+{arch};
$config{'options'} = $+{options} ? $+{options} : "";
$config{'uri'} = $+{uri};
$config{'components'} = $+{components};
if ( $config{'options'} =~ /arch=((?<arch>[\w\-]+)[,]*)/g ) {
$config{'arch'} = $+{arch};
}
$config{'components'} = [ split /\s+/, $config{'components'} ];
} elsif ( $line =~ /set[\t ]+(?<key>[^\s]+)[\t ]+(?<value>"[^"]+"|'[^']+'|[^\s]+)/ ) {
$config{'type'} = 'set';
$config{'key'} = $+{key};
$config{'value'} = $+{value};
$config{'value'} =~ s/^'(.*)'$/$1/;
$config{'value'} =~ s/^"(.*)"$/$1/;
} elsif ( $line =~ /(?<type>clean|skip-clean)[\t ]+(?<uri>[^\s]+)/ ) {
$config{'type'} = $+{type};
$config{'uri'} = $+{uri};
}
return %config;
}
sub sanitise_uri
{
my $uri = shift;
$uri =~ s[^(\w+)://][];
$uri =~ s/^([^@]+)?@?// if (split '/',$uri)[0] =~ /@/;
$uri =~ s/~/\%7E/g if get_variable("_tilde");
$uri =~ s/\+/\%2B/g if get_variable("_plus");
$uri =~ s[/$][];
return $uri;
}
open CONFIG, "<$config_file" or die("apt-mirror: can't open config file ($config_file)");
while (<CONFIG>)
{
next if /^\s*#/;
next unless /\S/;
my $line = $_;
my %config_line = parse_config_line;
if ( $config_line{'type'} eq "set" ) {
$config_variables{ $config_line{'key'} } = $config_line{'value'};
next;
} elsif ( $config_line{'type'} eq "deb" ) {
my $arch = $config_line{'arch'};
$arch = get_variable("defaultarch") if ! defined $config_line{'arch'};
push @config_binaries, [ $arch, $config_line{'uri'}, @{$config_line{'components'}} ];
next;
} elsif ( $config_line{'type'} eq "deb-src" ) {
push @config_sources, [ $config_line{'uri'}, @{$config_line{'components'}} ];
next;
} elsif ( $config_line{'type'} =~ /(skip-clean|clean)/ ) {
my $link = sanitise_uri($config_line{'uri'});
if ( $config_line{'type'} eq "skip-clean" ) {
$skipclean{ $link } = 1;
} elsif ( $config_line{'type'} eq "clean" ) {
$clean_directory{ $link } = 1;
}
next;
}
die("apt-mirror: invalid line in config file ($.: $line ...)");
}
close CONFIG;
die("Please explicitly specify 'defaultarch' in mirror.list") unless get_variable("defaultarch");
######################################################################################
## Create the 3 needed directories if they don't exist yet
my @needed_directories = ( get_variable("mirror_path"), get_variable("skel_path"), get_variable("var_path") );
foreach my $needed_directory (@needed_directories)
{
unless ( -d $needed_directory )
{
make_path($needed_directory) or die("apt-mirror: can't create $needed_directory directory");
}
}
#
#######################################################################################
lock_aptmirror();
######################################################################################
## Skel download
my %urls_to_download = ();
my ( $url, $arch );
sub remove_double_slashes
{
local $_ = shift;
while (s[/\./][/]g) { }
while (s[(?<!:)//][/]g) { }
while (s[(?<!:/)/[^/]+/\.\./][/]g) { }
s/~/\%7E/g if get_variable("_tilde");
s/\+/\%2B/g if get_variable("_plus");
return $_;
}
sub add_url_to_download
{
my $url = remove_double_slashes(shift);
my $size = shift;
my $strongest_hash = shift;
my $hash = shift;
my $hashsum = shift;
my $acquire_by_hash = shift;
my $canonical_filename = sanitise_uri($url);
$skipclean{$canonical_filename} = 1;
if ($acquire_by_hash)
{
# If the optional hashsum was passed as an argument
# - download the strongest hash only
# - make a copy to the canonical location
# - make a copy for the other known hash versions
$url = dirname($url) . "/by-hash/${hash}/${hashsum}";
my $hashsum_filename = dirname($canonical_filename) . "/by-hash/${hash}/${hashsum}";
$skipclean{$hashsum_filename} = 1;
if ($hash eq $strongest_hash)
{
# This is the strongest hash, which is the one to download.
# Also need to remember to which canonical location it should be linked.
$hashsum_to_files{$hashsum_filename} ||= [];
push @{$hashsum_to_files{$hashsum_filename}}, $canonical_filename;
$urls_to_download{$url} = $size;
$urls_checksums{$url} = [ $hash, $hashsum ];
} else {
# We are not going to download using this checksum, but we still
# need to know where to put the checksum.
$file_to_hashsums{$canonical_filename} ||= [];
push @{$file_to_hashsums{$canonical_filename}}, $hashsum_filename;
}
} else {
# Not using by-hash, so download the file only.
$urls_to_download{$url} = $size;
if ($strongest_hash and ($hash eq $strongest_hash))
{
$urls_checksums{$url} = [ $hash, $hashsum ];
}
}
}
foreach (@config_sources)
{
my ( $uri, $distribution, @components ) = @{$_};
if (@components)
{
$url = $uri . "/dists/" . $distribution . "/";
}
else
{
$url = $uri . "/" . $distribution . "/";
}
add_url_to_download( $url . "InRelease" );
add_url_to_download( $url . "Release" );
add_url_to_download( $url . "Release.gpg" );
}
foreach (@config_binaries)
{
my ( $arch, $uri, $distribution, @components ) = @{$_};
if (@components)
{
$url = $uri . "/dists/" . $distribution . "/";
}
else
{
$url = $uri . "/" . $distribution . "/";
}
add_url_to_download( $url . "InRelease" );
add_url_to_download( $url . "Release" );
add_url_to_download( $url . "Release.gpg" );
}
chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel");
@release_urls = sort keys %urls_to_download;
download_urls( "release", @release_urls );
######################################################################################
## Download all relevant metadata
%urls_to_download = ();
sub find_metadata_in_release
{
# Look in the Release file for any files we need to download
my ( $arch, $uri, $distribution, @components ) = @_;
my ( $release_uri, $release_path, $line ) = '';
my $component_regex = undef;
my $arch_regex = "(?:${arch}|all)";
my $compressed_extension_regex = '(?:\.(?:gz|bz2|xz|lzma))$';
my $dist_uri;
my $hash_type_regex = "(?:" . join("|", @hash_strength) . ")";
if (@components)
{
$dist_uri = remove_double_slashes($uri . "/dists/" . $distribution . "/");
$component_regex = "(?:" . join("|", @components) . ")";
}
else {
$dist_uri = remove_double_slashes($uri . "/" . $distribution . "/");
}
my $stream;
foreach my $release_filename ("InRelease", "Release")
{
$release_uri = $dist_uri . $release_filename;
$release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);
last if ( open $stream, "<", $release_path);
$stream = undef;
}
unless ( $stream )
{
warn( "Failed to find InRelease or Release in " . get_variable("skel_path") . "/" . sanitise_uri($dist_uri) );
return 0;
}
my $hash = undef;
my %avaiable_hashes = ();
my $acquire_by_hash = 0;
my @parts_to_download = ();
while ( $line = <$stream> )
{
chomp $line;
if ($hash)
{
if ( $line =~ /^ +(.*)$/ )
{
my @parts = split( / +/, $1 );
if ( @parts == 3 )
{
my ( $hashsum, $size, $filename ) = @parts;
push @parts, $hash;
if ($arch eq "source")
{
if ($component_regex)
{
# Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
if (
(
$filename =~ m{^${component_regex}/source/Sources${compressed_extension_regex}}
) or (
$filename =~ m{^${component_regex}/Contents-source${compressed_extension_regex}}
)
)
{
push @parts_to_download, \@parts;
}
} else {
# Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
if ($filename =~ m{^Sources${compressed_extension_regex}}
) {
push @parts_to_download, \@parts;
}
}
} else {
if ($component_regex)
{
# Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
if (
(
$filename =~ m{^${component_regex}/Contents-${arch_regex}${compressed_extension_regex}}
) or (
$filename =~ m{^Contents-${arch_regex}${compressed_extension_regex}}
) or (
$filename =~ m{^Packages${compressed_extension_regex}}
) or (
$filename =~ m{^${component_regex}/binary-${arch_regex}/Packages${compressed_extension_regex}}
) or (
$filename =~ m{^${component_regex}/binary-${arch_regex}/Release$}
) or (
$filename =~ m{^${component_regex}/cnf/Commands-${arch_regex}${compressed_extension_regex}}
) or (
$filename =~ m{^${component_regex}/dep11/Components-${arch_regex}.*${compressed_extension_regex}}
) or (
$filename =~ m{^${component_regex}/dep11/icons-.*${compressed_extension_regex}}
) or (
$filename =~ m{^${component_regex}/i18n/Translation-.*${compressed_extension_regex}}
)
)
{
push @parts_to_download, \@parts;
}
} else {
# Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
if ($filename =~ m{^Packages${compressed_extension_regex}})
{
push @parts_to_download, \@parts;
}
}
}
}
else
{
warn("Malformed checksum line \"$1\" in $release_uri");
}
}
else
{
$hash = undef;
}
}
if ( not $hash )
{
if ( $line =~ /^(${hash_type_regex}):$/ )
{
$hash = $1;
$avaiable_hashes{$hash} = 1;
}
elsif ( $line eq "Acquire-By-Hash: yes" )
{
$acquire_by_hash = 1;
}
}
}
close $stream;
my $strongest_hash;
if ($acquire_by_hash)
{
foreach (@hash_strength)
{
if ($avaiable_hashes{$_})
{
$strongest_hash = $_;
last;
}
}
unless ($strongest_hash)
{
warn("Cannot find a supported hash in $release_uri, will download from canonical locations.");
$acquire_by_hash = 0;
}
}
foreach (@parts_to_download)
{
my ( $hashsum, $size, $filename, $hash ) = @{$_};
if ($acquire_by_hash)
{
add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 1 );
}
else
{
add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 0 );
}
}
return 1;
}
print "Processing metadata files from releases [";
foreach (@config_binaries)
{
my ( $arch, $uri, $distribution, @components ) = @{$_};
print "M";
unless (find_metadata_in_release( $arch, $uri, $distribution, @components))
{
# Insecure repo with no release file - try to get the well known indices
foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "")
{
if (@components)
{
# Debian repo
foreach my $component (@components)
{
foreach my $path (
"/dists/${distribution}/${component}/binary-${arch}/Packages",
"/dists/${distribution}/${component}/binary-all/Packages",
"/dists/${distribution}/${component}/Contents-${arch}",
"/dists/${distribution}/${component}/Contents-all",
"/dists/${distribution}/Contents-${arch}",
"/dists/${distribution}/Contents-all",
)
{
add_url_to_download( "${uri}/${path}${file_extension}" );
}
}
} else {
# Flat repo
foreach my $path (
"${distribution}/Packages",
"${distribution}/Contents-${arch}",
"${distribution}/Contents-all",
)
{
add_url_to_download( "${uri}/${path}${file_extension}" );
}
}
}
}
}
foreach (@config_sources)
{
my ( $uri, $distribution, @components ) = @{$_};
print "M";
unless (find_metadata_in_release( "source", $uri, $distribution, @components))
{
# Insecure repo with no release file - try to get the well known indices
foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "")
{
if (@components)
{
# Debian repo
foreach my $path (
"${distribution}/source/Sources",
"${distribution}/Contents-source",
)
{
add_url_to_download( "${uri}/${path}${file_extension}" );
}
} else {
# Flat repo
add_url_to_download( "${uri}/${distribution}/Sources${file_extension}" );
}
}
}
}
print "]\n\n";
@index_urls = sort keys %urls_to_download;
download_urls( "index", @index_urls );
######################################################################################
## Main download preparations
%urls_to_download = ();
my %files_fh;
open $files_fh{ALL}, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)");
open $files_fh{NEW}, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)");
foreach my $hash (@hash_strength)
{
open $files_fh{$hash}, ">" . get_variable("var_path") . "/" . ${checksum_filenames{$hash}} or die("apt-mirror: can't write to intermediate file (${hash})");
}
my %stat_cache = ();
sub _stat
{
my ($filename) = shift;
return @{ $stat_cache{$filename} } if exists $stat_cache{$filename};
my @res = stat($filename);
$stat_cache{$filename} = \@res;
return @res;
}
sub clear_stat_cache
{
%stat_cache = ();
}
sub need_update
{
my $filename = shift;
my $size_on_server = shift;
my ( undef, undef, undef, undef, undef, undef, undef, $size ) = _stat($filename);
return 1 unless ($size);
return 0 if $size_on_server == $size;
if ( get_variable("unlink") == 1 )
{
unlink $filename;
}
return 1;
}
sub process_index
{
my $uri = shift;
my $index = shift;
my $optional = shift;
my ( $path, $package, $mirror, $files ) = '';
$path = sanitise_uri($uri);
local $/ = "\n\n";
$mirror = get_variable("mirror_path") . "/" . $path;
if (-e "$path/$index.gz" )
{
system("gunzip < $path/$index.gz > $path/$index");
}
elsif (-e "$path/$index.xz" )
{
system("xz -d < $path/$index.xz > $path/$index");
}
elsif (-e "$path/$index.lzma" )
{
system("xz -d < $path/$index.xz > $path/$index");
}
elsif (-e "$path/$index.bz2" )
{
system("bzip2 -d < $path/$index.bz2 > $path/$index");
}
unless ( open STREAM, "<$path/$index" )
{
if ($optional)
{
return;
}
warn("apt-mirror: can't open index $path/$index in process_index");
return;
}
while ( $package = <STREAM> )
{
local $/ = "\n";
chomp $package;
my ( undef, %lines ) = split( /^([\w\-]+): */m, $package );
chomp(%lines);
if ( exists $lines{"Filename"} )
{ # Packages index
my $filename = remove_double_slashes( $path . "/" . $lines{"Filename"});
$skipclean{ $filename } = 1;
print { $files_fh{ALL} } $filename . "\n";
foreach my $hash (@hash_strength)
{
my $index_hash = $packages_hashes{$hash};
print { $files_fh{$hash} } $lines{$index_hash} . " " . $filename . "\n" if $lines{$index_hash};
}
if ( need_update( $mirror . "/" . $lines{"Filename"}, $lines{"Size"} ) )
{
my $hashsum = undef;
my $hash = undef;
foreach $hash (@hash_strength)
{
my $index_hash = $packages_hashes{$hash};
if ($lines{$index_hash})
{
$hashsum = ${lines{$index_hash}};
last;
}
}
print { $files_fh{NEW} } $filename. "\n";
add_url_to_download( $uri . "/" . $lines{"Filename"}, $lines{"Size"}, $hash, $hash, $hashsum, 0 );
}
}
else
{ # Sources index
$lines{"Directory"} = "" unless defined $lines{"Directory"};
foreach my $hash (@hash_strength)
{
my $index_hash = $sources_hashes{$hash};
if ($lines{$index_hash})
{
foreach ( split( /\n/, $lines{$index_hash} ) )
{
next if $_ eq '';
my @file = split;
die("apt-mirror: invalid Sources format") if @file != 3;
my $download_url = $uri . "/" . $lines{"Directory"} . "/" . $file[2];
my $filename = remove_double_slashes( $path . "/" . $lines{"Directory"} . "/" . $file[2] );
print { $files_fh{$hash} } $file[0] . " " . ${filename} . "\n";
unless ($skipclean{ $filename })
{
$skipclean{ $filename } = 1;
print { $files_fh{ALL} } ${filename} . "\n";
if ( need_update( $mirror . "/" . $lines{"Directory"} . "/" . $file[2], $file[1] ) )
{
print { $files_fh{NEW} } ${download_url} . "\n";
add_url_to_download( $uri . "/" . $lines{"Directory"} . "/" . $file[2], $file[1], $hash, $hash, $file[0], 0 );
}
}
}
}
}
}
}
close STREAM;
}
print "Processing indexes: [";
foreach (@config_sources)
{
my ( $uri, $distribution, @components ) = @{$_};
print "S";
if (@components)
{
my $component;
foreach $component (@components)
{
process_index( $uri, "/dists/$distribution/$component/source/Sources" );
}
}
else
{
process_index( $uri, "/$distribution/Sources" );
}
}
foreach (@config_binaries)
{
my ( $arch, $uri, $distribution, @components ) = @{$_};
print "P";
if (@components)
{
my $component;
foreach $component (@components)
{
process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" );
process_index( $uri, "/dists/$distribution/$component/binary-all/Packages", 1 );
}
}
else
{
process_index( $uri, "/$distribution/Packages" );
}
}
clear_stat_cache();
print "]\n\n";
foreach my $fh (values %files_fh)
{
close $fh;
}
######################################################################################
## Main download
chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");
my $need_bytes = 0;
foreach ( values %urls_to_download )
{
$need_bytes += $_;
}
my $size_output = format_bytes($need_bytes);
print "$size_output will be downloaded into archive.\n";
download_urls( "archive", sort keys %urls_to_download );
######################################################################################
## Copy skel to main archive
sub copy_file
{
my ( $from, $to ) = @_;
my $dir = dirname($to);
return unless -f $from;
make_path($dir) unless -d $dir;
if ( get_variable("unlink") == 1 )
{
if ( compare( $from, $to ) != 0 ) { unlink($to); }
}
my @stat_from = stat($from);
if ( -f $to )
{
my @stat_to = stat($to);
return if ("@stat_to" eq "@stat_from");
}
unless ( link( $from, $to ) or copy( $from, $to ) )
{
warn("apt-mirror: can't copy $from to $to");
return;
}
my ( $atime, $mtime ) = @stat_from[ 8, 9 ];
utime( $atime, $mtime, $to ) or die("apt-mirror: can't utime $to");
}
foreach (@release_urls, @index_urls)
{
die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][];
copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") );
my $sanitized_uri = sanitise_uri($_);
# If we downloaded any files from a checksum location, now is the time to
# populate the canonical filename.
if ($hashsum_to_files{$sanitized_uri})
{
foreach my $filename (@{$hashsum_to_files{$sanitized_uri}})
{
copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $filename );
if ($file_to_hashsums{$filename})
{
foreach my $hashsum_filename (@{$file_to_hashsums{$filename}})
{
copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $hashsum_filename );
}
}
}
}
}
######################################################################################
## Make cleaning script
my ( @rm_dirs, @rm_files ) = ();
my $unnecessary_bytes = 0;
sub process_symlink
{
return 1; # symlinks are always needed
}
sub process_file
{
my $file = shift;
$file =~ s[~][%7E]g if get_variable("_tilde");
$file =~ s[\+][%2B]g if get_variable("_plus");
return 1 if $skipclean{$file};
push @rm_files, sanitise_uri($file);
my ( undef, undef, undef, undef, undef, undef, undef, $size, undef, undef, undef, undef, $blocks ) = stat($file);
$unnecessary_bytes += $blocks * 512;
return 0;
}
sub process_directory
{
my $dir = shift;
my $is_needed = 0;
return 1 if $skipclean{$dir};
opendir( my $dir_h, $dir ) or die "apt-mirror: can't opendir $dir: $!";
foreach ( grep { !/^\.$/ && !/^\.\.$/ } readdir($dir_h) )
{
my $item = $dir . "/" . $_;
$is_needed |= process_directory($item) if -d $item && !-l $item;
$is_needed |= process_file($item) if -f $item;
$is_needed |= process_symlink($item) if -l $item;
}
closedir $dir_h;
push @rm_dirs, $dir unless $is_needed;
return $is_needed;
}
chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");
foreach ( keys %clean_directory )
{
process_directory($_) if -d $_ && !-l $_;
}
open CLEAN, ">" . get_variable("cleanscript") or die("apt-mirror: can't open clean script file");
my ( $i, $total ) = ( 0, scalar @rm_files );
if ( get_variable("_autoclean") )
{
my $size_output = format_bytes($unnecessary_bytes);
print "$size_output in $total files and " . scalar(@rm_dirs) . " directories will be freed...";
chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");
foreach (@rm_files) { unlink $_; }
foreach (@rm_dirs) { rmdir $_; }
}
else
{
my $size_output = format_bytes($unnecessary_bytes);
print "$size_output in $total files and " . scalar(@rm_dirs) . " directories can be freed.\n";
print "Run " . get_variable("cleanscript") . " for this purpose.\n\n";
print CLEAN "#!/bin/sh\n";
print CLEAN "set -e\n\n";
print CLEAN "cd " . quoted_path(get_variable("mirror_path")) . "\n\n";
print CLEAN "echo 'Removing $total unnecessary files [$size_output]...'\n";
foreach (@rm_files)
{
print CLEAN "rm -f '$_'\n";
print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 500;
print CLEAN "echo -n .\n" unless $i % 10;
$i++;
}
print CLEAN "echo 'done.'\n";
print CLEAN "echo\n\n";
$i = 0;
$total = scalar @rm_dirs;
print CLEAN "echo 'Removing $total unnecessary directories...'\n";
foreach (@rm_dirs)
{
print CLEAN "if test -d '$_'; then rm -fr '$_'; fi\n";
print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 50;
print CLEAN "echo -n .\n";
$i++;
}
print CLEAN "echo 'done.'\n";
print CLEAN "echo\n";
close CLEAN;
}
# Make clean script executable
my $perm = ( stat get_variable("cleanscript") )[2] & 07777;
chmod( $perm | 0111, get_variable("cleanscript") );
if ( get_variable("run_postmirror") )
{
print "Running the Post Mirror script ...\n";
print "(" . get_variable("postmirror_script") . ")\n\n";
if ( -x get_variable("postmirror_script") )
{
system( get_variable("postmirror_script"), '' );
}
else
{
system( '/bin/sh', get_variable("postmirror_script") );
}
print "\nPost Mirror script has completed. See above output for any possible errors.\n\n";
}
unlock_aptmirror();
1 | #!/usr/bin/perl |
2 | |
3 | =pod |
4 | |
5 | =head1 NAME |
6 | |
7 | apt-mirror - apt sources mirroring tool |
8 | |
9 | =head1 SYNOPSIS |
10 | |
11 | apt-mirror [configfile] |
12 | |
13 | =head1 DESCRIPTION |
14 | |
15 | A small and efficient tool that lets you mirror a part of or |
16 | the whole Debian GNU/Linux distribution or any other apt sources. |
17 | |
18 | Main features: |
19 | * It uses a config similar to APT's F<sources.list> |
20 | * It's fully pool compliant |
21 | * It supports multithreaded downloading |
22 | * It supports multiple architectures at the same time |
23 | * It can automatically remove unneeded files |
24 | * It works well on an overloaded Internet connection |
25 | * It never produces an inconsistent mirror including while mirroring |
26 | * It works on all POSIX compliant systems with Perl and wget |
27 | |
28 | =head1 COMMENTS |
29 | |
30 | apt-mirror uses F</etc/apt/mirror.list> as a configuration file. |
31 | By default it is tuned to official Debian or Ubuntu mirrors. Change |
32 | it for your needs. |
33 | |
34 | After you setup the configuration file you may run as root: |
35 | |
36 | # su - apt-mirror -c apt-mirror |
37 | |
38 | Or uncomment the line in F</etc/cron.d/apt-mirror> to enable daily mirror updates. |
39 | |
40 | =head1 FILES |
41 | |
42 | F</etc/apt/mirror.list> |
43 | Main configuration file |
44 | |
45 | F</etc/cron.d/apt-mirror> |
46 | Cron configuration template |
47 | |
48 | F</var/spool/apt-mirror/mirror> |
49 | Mirror places here |
50 | |
51 | F</var/spool/apt-mirror/skel> |
52 | Place for temporarily downloaded indexes |
53 | |
54 | F</var/spool/apt-mirror/var> |
55 | Log files placed here. URLs and MD5 checksums also here. |
56 | |
57 | =head1 CONFIGURATION EXAMPLES |
58 | |
59 | The mirror.list configuration supports many options, the file is well commented explaining each option. |
60 | Here are some sample mirror configuration lines showing the various supported ways: |
61 | |
62 | Normal: |
63 | deb http://example.com/debian stable main contrib non-free |
64 | |
65 | Arch Specific: (many other architectures are supported) |
66 | deb-powerpc http://example.com/debian stable main contrib non-free |
67 | |
68 | HTTP and FTP Auth or non-standard port: |
69 | deb http://user:pass@example.com:8080/debian stable main contrib non-free |
70 | |
71 | HTTPS with sending Basic HTTP authentication information (plaintext username and password) for all requests: |
72 | (this was default behaviour of Wget 1.10.2 and prior and is needed for some servers with new version of Wget) |
73 | set auth_no_challenge 1 |
74 | deb https://user:pass@example.com:443/debian stable main contrib non-free |
75 | |
76 | HTTPS without checking certificate: |
77 | set no_check_certificate 1 |
78 | deb https://example.com:443/debian stable main contrib non-free |
79 | |
80 | Source Mirroring: |
81 | deb-src http://example.com/debian stable main contrib non-free |
82 | |
83 | =head1 AUTHORS |
84 | |
85 | Dmitry N. Hramtsov E<lt>hdn@nsu.ruE<gt> |
86 | Brandon Holtsclaw E<lt>me@brandonholtsclaw.comE<gt> |
87 | |
88 | =cut |
89 | |
90 | use warnings; |
91 | use strict; |
92 | use File::Copy; |
93 | use File::Compare; |
94 | use File::Path qw(make_path); |
95 | use File::Basename; |
96 | use Fcntl qw(:flock); |
97 | |
98 | my $config_file; |
99 | |
100 | my %config_variables = ( |
101 | "defaultarch" => `dpkg --print-architecture 2>/dev/null` || 'i386', |
102 | "nthreads" => 20, |
103 | "base_path" => '/var/spool/apt-mirror', |
104 | "mirror_path" => '$base_path/mirror', |
105 | "skel_path" => '$base_path/skel', |
106 | "var_path" => '$base_path/var', |
107 | "cleanscript" => '$var_path/clean.sh', |
108 | "_contents" => 1, |
109 | "_autoclean" => 0, |
110 | "_tilde" => 0, |
111 | "_plus" => 0, |
112 | "limit_rate" => '100m', |
113 | "run_postmirror" => 1, |
114 | "auth_no_challenge" => 0, |
115 | "no_check_certificate" => 0, |
116 | "unlink" => 0, |
117 | "paranoid" => 0, |
118 | "postmirror_script" => '$var_path/postmirror.sh', |
119 | "use_proxy" => 'off', |
120 | "http_proxy" => '', |
121 | "https_proxy" => '', |
122 | "proxy_user" => '', |
123 | "proxy_password" => '' |
124 | ); |
125 | |
126 | my @config_binaries = (); |
127 | my @config_sources = (); |
128 | |
129 | my @release_urls; |
130 | my @index_urls; |
131 | my @childrens = (); |
132 | my %skipclean = (); |
133 | my %clean_directory = (); |
134 | my @hash_strength = qw(SHA512 SHA256 SHA1 MD5Sum); |
135 | my %packages_hashes = ( |
136 | SHA512 => "SHA512", |
137 | SHA256 => "SHA256", |
138 | SHA1 => "SHA1", |
139 | MD5Sum => "MD5sum", |
140 | ); |
141 | my %sources_hashes = ( |
142 | SHA512 => "Checksums-Sha512", |
143 | SHA256 => "Checksums-Sha256", |
144 | SHA1 => "Checksums-Sha1", |
145 | MD5Sum => "Files", |
146 | ); |
147 | my %verify_commands = ( |
148 | SHA512 => "sha512sum", |
149 | SHA256 => "sha256sum", |
150 | SHA1 => "sha1sum", |
151 | MD5Sum => "md5sum", |
152 | ); |
153 | my %checksum_filenames = ( |
154 | SHA512 => "SHA512", |
155 | SHA256 => "SHA256", |
156 | SHA1 => "SHA1", |
157 | MD5Sum => "MD5", |
158 | ); |
159 | |
160 | # Mapping of files downloaded from a by-hash directory to their canonical locations. |
161 | my %hashsum_to_files = (); |
162 | |
163 | # Mapping of all the checksums for a given canonical filename. |
164 | my %file_to_hashsums; |
165 | my %urls_checksums = (); |
166 | |
167 | ###################################################################################### |
168 | ## Setting up $config_file variable |
169 | |
170 | $config_file = "/etc/apt/mirror.list"; # Default value |
171 | if ( $_ = shift ) |
172 | { |
173 | die("apt-mirror: invalid config file specified") unless -e $_; |
174 | $config_file = $_; |
175 | } |
176 | |
177 | chomp $config_variables{"defaultarch"}; |
178 | |
179 | ###################################################################################### |
180 | ## Common subroutines |
181 | |
182 | sub round_number |
183 | { |
184 | my $n = shift; |
185 | my $minus = $n < 0 ? '-' : ''; |
186 | $n = abs($n); |
187 | $n = int( ( $n + .05 ) * 10 ) / 10; |
188 | $n .= '.0' unless $n =~ /\./; |
189 | $n .= '0' if substr( $n, ( length($n) - 1 ), 1 ) eq '.'; |
190 | chop $n if $n =~ /\.\d\d0$/; |
191 | return "$minus$n"; |
192 | } |
193 | |
194 | sub format_bytes |
195 | { |
196 | my $bytes = shift; |
197 | my $bytes_out = '0'; |
198 | my $size_name = 'bytes'; |
199 | my $KiB = 1024; |
200 | my $MiB = 1024 * 1024; |
201 | my $GiB = 1024 * 1024 * 1024; |
202 | |
203 | if ( $bytes >= $KiB ) |
204 | { |
205 | $bytes_out = $bytes / $KiB; |
206 | $size_name = 'KiB'; |
207 | if ( $bytes >= $MiB ) |
208 | { |
209 | $bytes_out = $bytes / $MiB; |
210 | $size_name = 'MiB'; |
211 | if ( $bytes >= $GiB ) |
212 | { |
213 | $bytes_out = $bytes / $GiB; |
214 | $size_name = 'GiB'; |
215 | } |
216 | } |
217 | $bytes_out = round_number($bytes_out); |
218 | } |
219 | else |
220 | { |
221 | $bytes_out = $bytes; |
222 | $size_name = 'bytes'; |
223 | } |
224 | |
225 | return "$bytes_out $size_name"; |
226 | } |
227 | |
228 | sub get_variable |
229 | { |
230 | my $value = $config_variables{ shift @_ }; |
231 | my $count = 16; |
232 | while ( $value =~ s/\$(\w+)/$config_variables{$1}/xg ) |
233 | { |
234 | die("apt-mirror: too many substitution while evaluating variable") if ( $count-- ) < 0; |
235 | } |
236 | return $value; |
237 | } |
238 | |
239 | sub quoted_path |
240 | { |
241 | my $path = shift; |
242 | $path =~ s/'/'\\''/g; |
243 | return "'" . $path . "'"; |
244 | } |
245 | |
246 | sub lock_aptmirror |
247 | { |
248 | open( LOCK_FILE, '>', get_variable("var_path") . "/apt-mirror.lock" ); |
249 | my $lock = flock( LOCK_FILE, LOCK_EX | LOCK_NB ); |
250 | if ( !$lock ) |
251 | { |
252 | die("apt-mirror is already running, exiting"); |
253 | } |
254 | } |
255 | |
256 | sub unlock_aptmirror |
257 | { |
258 | close(LOCK_FILE); |
259 | unlink( get_variable("var_path") . "/apt-mirror.lock" ); |
260 | } |
261 | |
262 | sub delete_corrupted_files |
263 | { |
264 | my $stage = shift; |
265 | my $found = 0; |
266 | foreach my $hash (@hash_strength) |
267 | { |
268 | my $file = get_variable("var_path") . "/${stage}-${hash}"; |
269 | if (-s $file) |
270 | { |
271 | my $pipe; |
272 | open $pipe, "-|", qq(env LC_ALL=C ${verify_commands{$hash}} --check --quiet ${file} 2>/dev/null) or die "Cannot run ${verify_commands{$hash}}"; |
273 | while (<$pipe>) |
274 | { |
275 | my ($filename) = /^(.*): FAILED/; |
276 | if (-f $filename) |
277 | { |
278 | $found++; |
279 | print "$filename is corrupted, deleting....\n"; |
280 | unlink $filename or die "Cannot delete $filename."; |
281 | } |
282 | } |
283 | close $pipe; |
284 | } |
285 | } |
286 | return $found; |
287 | } |
288 | |
289 | sub download_urls |
290 | { |
291 | my $stage = shift; |
292 | my @urls; |
293 | my $i = 0; |
294 | my $pid; |
295 | my $nthreads = get_variable("nthreads"); |
296 | my @args = (); |
297 | local $| = 1; |
298 | |
299 | @urls = @_; |
300 | $nthreads = @urls if @urls < $nthreads; |
301 | |
302 | if ( get_variable("auth_no_challenge") == 1 ) { push( @args, "--auth-no-challenge" ); } |
303 | if ( get_variable("no_check_certificate") == 1 ) { push( @args, "--no-check-certificate" ); } |
304 | if ( get_variable("unlink") == 1 ) { push( @args, "--unlink" ); } |
305 | if ( length( get_variable("use_proxy") ) && ( get_variable("use_proxy") eq 'yes' || get_variable("use_proxy") eq 'on' ) ) |
306 | { |
307 | if ( length( get_variable("http_proxy") ) || length( get_variable("https_proxy") ) ) { push( @args, "-e use_proxy=yes" ); } |
308 | if ( length( get_variable("http_proxy") ) ) { push( @args, "-e http_proxy=" . get_variable("http_proxy") ); } |
309 | if ( length( get_variable("https_proxy") ) ) { push( @args, "-e https_proxy=" . get_variable("https_proxy") ); } |
310 | if ( length( get_variable("proxy_user") ) ) { push( @args, "-e proxy_user=" . get_variable("proxy_user") ); } |
311 | if ( length( get_variable("proxy_password") ) ) { push( @args, "-e proxy_password=" . get_variable("proxy_password") ); } |
312 | } |
313 | print "Downloading " . scalar(@urls) . " $stage files using $nthreads threads...\n"; |
314 | |
315 | if (get_variable("paranoid")) |
316 | { |
317 | my %fh = (); |
318 | foreach my $hash (@hash_strength) |
319 | { |
320 | open $fh{$hash}, ">", get_variable("var_path") . "/${stage}-${hash}" or die ("apt-mirror: Cannot write to ${stage}-${hash}"); |
321 | } |
322 | |
323 | foreach (@urls) |
324 | { |
325 | if ($urls_checksums{$_}) |
326 | { |
327 | my ($hash, $hashsum) = @{$urls_checksums{$_}}; |
328 | my $fh = $fh{$hash}; |
329 | print $fh $hashsum . " " . sanitise_uri($_) . "\n"; |
330 | } |
331 | } |
332 | foreach my $hash (@hash_strength) |
333 | { |
334 | close $fh{$hash}; |
335 | } |
336 | } |
337 | |
338 | my @url_fds; |
339 | for ($i=0; $i<$nthreads; $i++) |
340 | { |
341 | open ( $url_fds[$i], ">", get_variable("var_path") . "/$stage-urls.$i") or die("apt-mirror: can't write to intermediate file ($stage-urls.$i)"); |
342 | } |
343 | |
344 | for ($i=scalar(@urls)-1; $i>=0; $i--) |
345 | { |
346 | my $thread = $i % $nthreads; |
347 | print { $url_fds[$thread] } $urls[$i] . "\n"; |
348 | } |
349 | |
350 | foreach (@url_fds) { |
351 | close $_ or die("apt-mirror: can't close intermediate file ($stage-urls.$i)"); |
352 | } |
353 | |
354 | for ($i=0; $i<$nthreads; $i++) |
355 | { |
356 | |
357 | $pid = fork(); |
358 | |
359 | die("apt-mirror: can't do fork in download_urls") if !defined($pid); |
360 | |
361 | if ( $pid == 0 ) |
362 | { |
363 | exec 'wget', '--no-if-modified-since', '--no-cache', '--limit-rate=' . get_variable("limit_rate"), '-T', '60', '-t', '1', '-r', '-N', '-l', 'inf', '-o', get_variable("var_path") . "/$stage-log.$i", '-i', get_variable("var_path") . "/$stage-urls.$i", @args; |
364 | |
365 | # shouldn't reach this unless exec fails |
366 | die("\n\nCould not run wget, please make sure its installed and in your path\n\n"); |
367 | } |
368 | |
369 | push @childrens, $pid; |
370 | } |
371 | |
372 | print "Begin time: " . localtime() . "\n[" . scalar(@childrens) . "]... "; |
373 | while ( scalar @childrens ) |
374 | { |
375 | my $dead = wait(); |
376 | @childrens = grep { $_ != $dead } @childrens; |
377 | print "[" . scalar(@childrens) . "]... "; |
378 | } |
379 | print "\nEnd time: " . localtime() . "\n\n"; |
380 | |
381 | if (get_variable("paranoid")) |
382 | { |
383 | if (delete_corrupted_files($stage) > 0) |
384 | { |
385 | die "Some files were corrupted while downloading, aborting..."; |
386 | } |
387 | } |
388 | |
389 | if (scalar keys %hashsum_to_files > 0) |
390 | { |
391 | foreach my $hashsum_filename (keys %hashsum_to_files) |
392 | { |
393 | foreach my $filename (@{$hashsum_to_files{$hashsum_filename}}) |
394 | { |
395 | copy_file( $hashsum_filename, $filename ); |
396 | } |
397 | } |
398 | } |
399 | |
400 | } |
401 | |
402 | ## Parse config |
403 | |
404 | sub parse_config_line |
405 | { |
406 | my $pattern_deb_line = qr/^[\t ]*(?<type>deb-src|deb)(?:-(?<arch>[\w\-]+))?[\t ]+(?:\[(?<options>[^\]]+)\][\t ]+)?(?<uri>[^\s]+)[\t ]+(?<components>.+)$/; |
407 | my $line = $_; |
408 | my %config; |
409 | if ( $line =~ $pattern_deb_line ) { |
410 | $config{'type'} = $+{type}; |
411 | $config{'arch'} = $+{arch}; |
412 | $config{'options'} = $+{options} ? $+{options} : ""; |
413 | $config{'uri'} = $+{uri}; |
414 | $config{'components'} = $+{components}; |
415 | if ( $config{'options'} =~ /arch=((?<arch>[\w\-]+)[,]*)/g ) { |
416 | $config{'arch'} = $+{arch}; |
417 | } |
418 | $config{'components'} = [ split /\s+/, $config{'components'} ]; |
419 | } elsif ( $line =~ /set[\t ]+(?<key>[^\s]+)[\t ]+(?<value>"[^"]+"|'[^']+'|[^\s]+)/ ) { |
420 | $config{'type'} = 'set'; |
421 | $config{'key'} = $+{key}; |
422 | $config{'value'} = $+{value}; |
423 | $config{'value'} =~ s/^'(.*)'$/$1/; |
424 | $config{'value'} =~ s/^"(.*)"$/$1/; |
425 | } elsif ( $line =~ /(?<type>clean|skip-clean)[\t ]+(?<uri>[^\s]+)/ ) { |
426 | $config{'type'} = $+{type}; |
427 | $config{'uri'} = $+{uri}; |
428 | } |
429 | |
430 | return %config; |
431 | } |
432 | |
433 | sub sanitise_uri |
434 | { |
435 | my $uri = shift; |
436 | $uri =~ s[^(\w+)://][]; |
437 | $uri =~ s/^([^@]+)?@?// if (split '/',$uri)[0] =~ /@/; |
438 | $uri =~ s/~/\%7E/g if get_variable("_tilde"); |
439 | $uri =~ s/\+/\%2B/g if get_variable("_plus"); |
440 | $uri =~ s[/$][]; |
441 | return $uri; |
442 | } |
443 | |
444 | open CONFIG, "<$config_file" or die("apt-mirror: can't open config file ($config_file)"); |
445 | while (<CONFIG>) |
446 | { |
447 | next if /^\s*#/; |
448 | next unless /\S/; |
449 | my $line = $_; |
450 | my %config_line = parse_config_line; |
451 | |
452 | if ( $config_line{'type'} eq "set" ) { |
453 | $config_variables{ $config_line{'key'} } = $config_line{'value'}; |
454 | next; |
455 | } elsif ( $config_line{'type'} eq "deb" ) { |
456 | my $arch = $config_line{'arch'}; |
457 | $arch = get_variable("defaultarch") if ! defined $config_line{'arch'}; |
458 | push @config_binaries, [ $arch, $config_line{'uri'}, @{$config_line{'components'}} ]; |
459 | next; |
460 | } elsif ( $config_line{'type'} eq "deb-src" ) { |
461 | push @config_sources, [ $config_line{'uri'}, @{$config_line{'components'}} ]; |
462 | next; |
463 | } elsif ( $config_line{'type'} =~ /(skip-clean|clean)/ ) { |
464 | my $link = sanitise_uri($config_line{'uri'}); |
465 | if ( $config_line{'type'} eq "skip-clean" ) { |
466 | $skipclean{ $link } = 1; |
467 | } elsif ( $config_line{'type'} eq "clean" ) { |
468 | $clean_directory{ $link } = 1; |
469 | } |
470 | next; |
471 | } |
472 | |
473 | die("apt-mirror: invalid line in config file ($.: $line ...)"); |
474 | } |
475 | close CONFIG; |
476 | |
477 | die("Please explicitly specify 'defaultarch' in mirror.list") unless get_variable("defaultarch"); |
478 | |
479 | ###################################################################################### |
480 | ## Create the 3 needed directories if they don't exist yet |
481 | my @needed_directories = ( get_variable("mirror_path"), get_variable("skel_path"), get_variable("var_path") ); |
482 | foreach my $needed_directory (@needed_directories) |
483 | { |
484 | unless ( -d $needed_directory ) |
485 | { |
486 | make_path($needed_directory) or die("apt-mirror: can't create $needed_directory directory"); |
487 | } |
488 | } |
489 | # |
490 | ####################################################################################### |
491 | |
492 | lock_aptmirror(); |
493 | |
494 | ###################################################################################### |
495 | ## Skel download |
496 | |
497 | my %urls_to_download = (); |
498 | my ( $url, $arch ); |
499 | |
500 | sub remove_double_slashes |
501 | { |
502 | local $_ = shift; |
503 | while (s[/\./][/]g) { } |
504 | while (s[(?<!:)//][/]g) { } |
505 | while (s[(?<!:/)/[^/]+/\.\./][/]g) { } |
506 | s/~/\%7E/g if get_variable("_tilde"); |
507 | s/\+/\%2B/g if get_variable("_plus"); |
508 | return $_; |
509 | } |
510 | |
511 | sub add_url_to_download |
512 | { |
513 | my $url = remove_double_slashes(shift); |
514 | my $size = shift; |
515 | my $strongest_hash = shift; |
516 | my $hash = shift; |
517 | my $hashsum = shift; |
518 | my $acquire_by_hash = shift; |
519 | |
520 | my $canonical_filename = sanitise_uri($url); |
521 | $skipclean{$canonical_filename} = 1; |
522 | |
523 | if ($acquire_by_hash) |
524 | { |
525 | # If the optional hashsum was passed as an argument |
526 | # - download the strongest hash only |
527 | # - make a copy to the canonical location |
528 | # - make a copy for the other known hash versions |
529 | |
530 | $url = dirname($url) . "/by-hash/${hash}/${hashsum}"; |
531 | |
532 | my $hashsum_filename = dirname($canonical_filename) . "/by-hash/${hash}/${hashsum}"; |
533 | $skipclean{$hashsum_filename} = 1; |
534 | |
535 | if ($hash eq $strongest_hash) |
536 | { |
537 | # This is the strongest hash, which is the one to download. |
538 | # Also need to remember to which canonical location it should be linked. |
539 | $hashsum_to_files{$hashsum_filename} ||= []; |
540 | push @{$hashsum_to_files{$hashsum_filename}}, $canonical_filename; |
541 | $urls_to_download{$url} = $size; |
542 | $urls_checksums{$url} = [ $hash, $hashsum ]; |
543 | |
544 | } else { |
545 | # We are not going to download using this checksum, but we still |
546 | # need to know where to put the checksum. |
547 | $file_to_hashsums{$canonical_filename} ||= []; |
548 | push @{$file_to_hashsums{$canonical_filename}}, $hashsum_filename; |
549 | } |
550 | } else { |
551 | # Not using by-hash, so download the file only. |
552 | $urls_to_download{$url} = $size; |
553 | if ($strongest_hash and ($hash eq $strongest_hash)) |
554 | { |
555 | $urls_checksums{$url} = [ $hash, $hashsum ]; |
556 | } |
557 | } |
558 | } |
559 | |
560 | foreach (@config_sources) |
561 | { |
562 | my ( $uri, $distribution, @components ) = @{$_}; |
563 | |
564 | if (@components) |
565 | { |
566 | $url = $uri . "/dists/" . $distribution . "/"; |
567 | } |
568 | else |
569 | { |
570 | $url = $uri . "/" . $distribution . "/"; |
571 | } |
572 | |
573 | add_url_to_download( $url . "InRelease" ); |
574 | add_url_to_download( $url . "Release" ); |
575 | add_url_to_download( $url . "Release.gpg" ); |
576 | } |
577 | |
578 | foreach (@config_binaries) |
579 | { |
580 | my ( $arch, $uri, $distribution, @components ) = @{$_}; |
581 | |
582 | if (@components) |
583 | { |
584 | $url = $uri . "/dists/" . $distribution . "/"; |
585 | |
586 | } |
587 | else |
588 | { |
589 | $url = $uri . "/" . $distribution . "/"; |
590 | } |
591 | |
592 | add_url_to_download( $url . "InRelease" ); |
593 | add_url_to_download( $url . "Release" ); |
594 | add_url_to_download( $url . "Release.gpg" ); |
595 | |
596 | } |
597 | |
598 | chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel"); |
599 | @release_urls = sort keys %urls_to_download; |
600 | download_urls( "release", @release_urls ); |
601 | |
602 | ###################################################################################### |
603 | ## Download all relevant metadata |
604 | |
605 | %urls_to_download = (); |
606 | |
607 | sub find_metadata_in_release |
608 | { |
609 | # Look in the Release file for any files we need to download |
610 | my ( $arch, $uri, $distribution, @components ) = @_; |
611 | |
612 | my ( $release_uri, $release_path, $line ) = ''; |
613 | my $component_regex = undef; |
614 | my $arch_regex = "(?:${arch}|all)"; |
615 | my $compressed_extension_regex = '(?:\.(?:gz|bz2|xz|lzma))$'; |
616 | my $dist_uri; |
617 | my $hash_type_regex = "(?:" . join("|", @hash_strength) . ")"; |
618 | |
619 | if (@components) |
620 | { |
621 | $dist_uri = remove_double_slashes($uri . "/dists/" . $distribution . "/"); |
622 | $component_regex = "(?:" . join("|", @components) . ")"; |
623 | } |
624 | else { |
625 | $dist_uri = remove_double_slashes($uri . "/" . $distribution . "/"); |
626 | } |
627 | |
628 | my $stream; |
629 | foreach my $release_filename ("InRelease", "Release") |
630 | { |
631 | $release_uri = $dist_uri . $release_filename; |
632 | $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri); |
633 | |
634 | last if ( open $stream, "<", $release_path); |
635 | $stream = undef; |
636 | } |
637 | |
638 | unless ( $stream ) |
639 | { |
640 | warn( "Failed to find InRelease or Release in " . get_variable("skel_path") . "/" . sanitise_uri($dist_uri) ); |
641 | return 0; |
642 | } |
643 | |
644 | |
645 | my $hash = undef; |
646 | my %avaiable_hashes = (); |
647 | my $acquire_by_hash = 0; |
648 | my @parts_to_download = (); |
649 | while ( $line = <$stream> ) |
650 | { |
651 | chomp $line; |
652 | if ($hash) |
653 | { |
654 | if ( $line =~ /^ +(.*)$/ ) |
655 | { |
656 | my @parts = split( / +/, $1 ); |
657 | if ( @parts == 3 ) |
658 | { |
659 | my ( $hashsum, $size, $filename ) = @parts; |
660 | push @parts, $hash; |
661 | if ($arch eq "source") |
662 | { |
663 | if ($component_regex) |
664 | { |
665 | # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format |
666 | if ( |
667 | ( |
668 | $filename =~ m{^${component_regex}/source/Sources${compressed_extension_regex}} |
669 | ) or ( |
670 | $filename =~ m{^${component_regex}/Contents-source${compressed_extension_regex}} |
671 | ) |
672 | ) |
673 | { |
674 | push @parts_to_download, \@parts; |
675 | } |
676 | } else { |
677 | # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format |
678 | if ($filename =~ m{^Sources${compressed_extension_regex}} |
679 | ) { |
680 | push @parts_to_download, \@parts; |
681 | } |
682 | } |
683 | } else { |
684 | if ($component_regex) |
685 | { |
686 | # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format |
687 | if ( |
688 | ( |
689 | $filename =~ m{^${component_regex}/Contents-${arch_regex}${compressed_extension_regex}} |
690 | ) or ( |
691 | $filename =~ m{^Contents-${arch_regex}${compressed_extension_regex}} |
692 | ) or ( |
693 | $filename =~ m{^Packages${compressed_extension_regex}} |
694 | ) or ( |
695 | $filename =~ m{^${component_regex}/binary-${arch_regex}/Packages${compressed_extension_regex}} |
696 | ) or ( |
697 | $filename =~ m{^${component_regex}/binary-${arch_regex}/Release$} |
698 | ) or ( |
699 | $filename =~ m{^${component_regex}/cnf/Commands-${arch_regex}${compressed_extension_regex}} |
700 | ) or ( |
701 | $filename =~ m{^${component_regex}/dep11/Components-${arch_regex}.*${compressed_extension_regex}} |
702 | ) or ( |
703 | $filename =~ m{^${component_regex}/dep11/icons-.*${compressed_extension_regex}} |
704 | ) or ( |
705 | $filename =~ m{^${component_regex}/i18n/Translation-.*${compressed_extension_regex}} |
706 | ) |
707 | ) |
708 | { |
709 | push @parts_to_download, \@parts; |
710 | } |
711 | } else { |
712 | # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format |
713 | if ($filename =~ m{^Packages${compressed_extension_regex}}) |
714 | { |
715 | push @parts_to_download, \@parts; |
716 | } |
717 | } |
718 | } |
719 | } |
720 | else |
721 | { |
722 | warn("Malformed checksum line \"$1\" in $release_uri"); |
723 | } |
724 | } |
725 | else |
726 | { |
727 | $hash = undef; |
728 | } |
729 | } |
730 | if ( not $hash ) |
731 | { |
732 | if ( $line =~ /^(${hash_type_regex}):$/ ) |
733 | { |
734 | $hash = $1; |
735 | $avaiable_hashes{$hash} = 1; |
736 | } |
737 | elsif ( $line eq "Acquire-By-Hash: yes" ) |
738 | { |
739 | $acquire_by_hash = 1; |
740 | } |
741 | } |
742 | } |
743 | close $stream; |
744 | |
745 | my $strongest_hash; |
746 | if ($acquire_by_hash) |
747 | { |
748 | foreach (@hash_strength) |
749 | { |
750 | if ($avaiable_hashes{$_}) |
751 | { |
752 | $strongest_hash = $_; |
753 | last; |
754 | } |
755 | } |
756 | unless ($strongest_hash) |
757 | { |
758 | warn("Cannot find a supported hash in $release_uri, will download from canonical locations."); |
759 | $acquire_by_hash = 0; |
760 | } |
761 | } |
762 | |
763 | foreach (@parts_to_download) |
764 | { |
765 | my ( $hashsum, $size, $filename, $hash ) = @{$_}; |
766 | if ($acquire_by_hash) |
767 | { |
768 | add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 1 ); |
769 | } |
770 | else |
771 | { |
772 | add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 0 ); |
773 | } |
774 | } |
775 | return 1; |
776 | } |
777 | |
778 | print "Processing metadata files from releases ["; |
779 | foreach (@config_binaries) |
780 | { |
781 | my ( $arch, $uri, $distribution, @components ) = @{$_}; |
782 | print "M"; |
783 | unless (find_metadata_in_release( $arch, $uri, $distribution, @components)) |
784 | { |
785 | # Insecure repo with no release file - try to get the well known indices |
786 | foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "") |
787 | { |
788 | if (@components) |
789 | { |
790 | # Debian repo |
791 | foreach my $component (@components) |
792 | { |
793 | foreach my $path ( |
794 | "/dists/${distribution}/${component}/binary-${arch}/Packages", |
795 | "/dists/${distribution}/${component}/binary-all/Packages", |
796 | "/dists/${distribution}/${component}/Contents-${arch}", |
797 | "/dists/${distribution}/${component}/Contents-all", |
798 | "/dists/${distribution}/Contents-${arch}", |
799 | "/dists/${distribution}/Contents-all", |
800 | ) |
801 | { |
802 | add_url_to_download( "${uri}/${path}${file_extension}" ); |
803 | } |
804 | } |
805 | } else { |
806 | # Flat repo |
807 | foreach my $path ( |
808 | "${distribution}/Packages", |
809 | "${distribution}/Contents-${arch}", |
810 | "${distribution}/Contents-all", |
811 | ) |
812 | { |
813 | add_url_to_download( "${uri}/${path}${file_extension}" ); |
814 | } |
815 | } |
816 | } |
817 | } |
818 | } |
819 | |
820 | foreach (@config_sources) |
821 | { |
822 | my ( $uri, $distribution, @components ) = @{$_}; |
823 | print "M"; |
824 | unless (find_metadata_in_release( "source", $uri, $distribution, @components)) |
825 | { |
826 | # Insecure repo with no release file - try to get the well known indices |
827 | foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "") |
828 | { |
829 | if (@components) |
830 | { |
831 | # Debian repo |
832 | foreach my $path ( |
833 | "${distribution}/source/Sources", |
834 | "${distribution}/Contents-source", |
835 | ) |
836 | { |
837 | add_url_to_download( "${uri}/${path}${file_extension}" ); |
838 | } |
839 | } else { |
840 | # Flat repo |
841 | add_url_to_download( "${uri}/${distribution}/Sources${file_extension}" ); |
842 | } |
843 | } |
844 | } |
845 | } |
846 | print "]\n\n"; |
847 | |
848 | @index_urls = sort keys %urls_to_download; |
849 | download_urls( "index", @index_urls ); |
850 | |
851 | ###################################################################################### |
852 | ## Main download preparations |
853 | |
854 | %urls_to_download = (); |
855 | |
856 | my %files_fh; |
857 | |
858 | open $files_fh{ALL}, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)"); |
859 | open $files_fh{NEW}, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)"); |
860 | foreach my $hash (@hash_strength) |
861 | { |
862 | open $files_fh{$hash}, ">" . get_variable("var_path") . "/" . ${checksum_filenames{$hash}} or die("apt-mirror: can't write to intermediate file (${hash})"); |
863 | } |
864 | |
865 | my %stat_cache = (); |
866 | |
867 | sub _stat |
868 | { |
869 | my ($filename) = shift; |
870 | return @{ $stat_cache{$filename} } if exists $stat_cache{$filename}; |
871 | my @res = stat($filename); |
872 | $stat_cache{$filename} = \@res; |
873 | return @res; |
874 | } |
875 | |
876 | sub clear_stat_cache |
877 | { |
878 | %stat_cache = (); |
879 | } |
880 | |
881 | sub need_update |
882 | { |
883 | my $filename = shift; |
884 | my $size_on_server = shift; |
885 | |
886 | my ( undef, undef, undef, undef, undef, undef, undef, $size ) = _stat($filename); |
887 | |
888 | return 1 unless ($size); |
889 | return 0 if $size_on_server == $size; |
890 | |
891 | if ( get_variable("unlink") == 1 ) |
892 | { |
893 | unlink $filename; |
894 | } |
895 | return 1; |
896 | } |
897 | |
898 | sub process_index |
899 | { |
900 | my $uri = shift; |
901 | my $index = shift; |
902 | my $optional = shift; |
903 | my ( $path, $package, $mirror, $files ) = ''; |
904 | |
905 | $path = sanitise_uri($uri); |
906 | local $/ = "\n\n"; |
907 | $mirror = get_variable("mirror_path") . "/" . $path; |
908 | |
909 | if (-e "$path/$index.gz" ) |
910 | { |
911 | system("gunzip < $path/$index.gz > $path/$index"); |
912 | } |
913 | elsif (-e "$path/$index.xz" ) |
914 | { |
915 | system("xz -d < $path/$index.xz > $path/$index"); |
916 | } |
917 | elsif (-e "$path/$index.lzma" ) |
918 | { |
919 | system("xz -d < $path/$index.xz > $path/$index"); |
920 | } |
921 | elsif (-e "$path/$index.bz2" ) |
922 | { |
923 | system("bzip2 -d < $path/$index.bz2 > $path/$index"); |
924 | } |
925 | |
926 | unless ( open STREAM, "<$path/$index" ) |
927 | { |
928 | if ($optional) |
929 | { |
930 | return; |
931 | } |
932 | warn("apt-mirror: can't open index $path/$index in process_index"); |
933 | return; |
934 | } |
935 | |
936 | while ( $package = <STREAM> ) |
937 | { |
938 | local $/ = "\n"; |
939 | chomp $package; |
940 | my ( undef, %lines ) = split( /^([\w\-]+): */m, $package ); |
941 | |
942 | chomp(%lines); |
943 | |
944 | if ( exists $lines{"Filename"} ) |
945 | { # Packages index |
946 | my $filename = remove_double_slashes( $path . "/" . $lines{"Filename"}); |
947 | $skipclean{ $filename } = 1; |
948 | print { $files_fh{ALL} } $filename . "\n"; |
949 | foreach my $hash (@hash_strength) |
950 | { |
951 | my $index_hash = $packages_hashes{$hash}; |
952 | print { $files_fh{$hash} } $lines{$index_hash} . " " . $filename . "\n" if $lines{$index_hash}; |
953 | } |
954 | if ( need_update( $mirror . "/" . $lines{"Filename"}, $lines{"Size"} ) ) |
955 | { |
956 | my $hashsum = undef; |
957 | my $hash = undef; |
958 | foreach $hash (@hash_strength) |
959 | { |
960 | my $index_hash = $packages_hashes{$hash}; |
961 | if ($lines{$index_hash}) |
962 | { |
963 | $hashsum = ${lines{$index_hash}}; |
964 | last; |
965 | } |
966 | } |
967 | print { $files_fh{NEW} } $filename. "\n"; |
968 | add_url_to_download( $uri . "/" . $lines{"Filename"}, $lines{"Size"}, $hash, $hash, $hashsum, 0 ); |
969 | } |
970 | } |
971 | else |
972 | { # Sources index |
973 | $lines{"Directory"} = "" unless defined $lines{"Directory"}; |
974 | foreach my $hash (@hash_strength) |
975 | { |
976 | my $index_hash = $sources_hashes{$hash}; |
977 | if ($lines{$index_hash}) |
978 | { |
979 | foreach ( split( /\n/, $lines{$index_hash} ) ) |
980 | { |
981 | next if $_ eq ''; |
982 | my @file = split; |
983 | die("apt-mirror: invalid Sources format") if @file != 3; |
984 | my $download_url = $uri . "/" . $lines{"Directory"} . "/" . $file[2]; |
985 | my $filename = remove_double_slashes( $path . "/" . $lines{"Directory"} . "/" . $file[2] ); |
986 | print { $files_fh{$hash} } $file[0] . " " . ${filename} . "\n"; |
987 | |
988 | unless ($skipclean{ $filename }) |
989 | { |
990 | $skipclean{ $filename } = 1; |
991 | print { $files_fh{ALL} } ${filename} . "\n"; |
992 | if ( need_update( $mirror . "/" . $lines{"Directory"} . "/" . $file[2], $file[1] ) ) |
993 | { |
994 | print { $files_fh{NEW} } ${download_url} . "\n"; |
995 | add_url_to_download( $uri . "/" . $lines{"Directory"} . "/" . $file[2], $file[1], $hash, $hash, $file[0], 0 ); |
996 | } |
997 | } |
998 | } |
999 | } |
1000 | } |
1001 | } |
1002 | } |
1003 | |
1004 | close STREAM; |
1005 | } |
1006 | |
1007 | print "Processing indexes: ["; |
1008 | |
1009 | foreach (@config_sources) |
1010 | { |
1011 | my ( $uri, $distribution, @components ) = @{$_}; |
1012 | print "S"; |
1013 | if (@components) |
1014 | { |
1015 | my $component; |
1016 | foreach $component (@components) |
1017 | { |
1018 | process_index( $uri, "/dists/$distribution/$component/source/Sources" ); |
1019 | } |
1020 | } |
1021 | else |
1022 | { |
1023 | process_index( $uri, "/$distribution/Sources" ); |
1024 | } |
1025 | } |
1026 | |
1027 | foreach (@config_binaries) |
1028 | { |
1029 | my ( $arch, $uri, $distribution, @components ) = @{$_}; |
1030 | print "P"; |
1031 | if (@components) |
1032 | { |
1033 | my $component; |
1034 | foreach $component (@components) |
1035 | { |
1036 | process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" ); |
1037 | process_index( $uri, "/dists/$distribution/$component/binary-all/Packages", 1 ); |
1038 | } |
1039 | } |
1040 | else |
1041 | { |
1042 | process_index( $uri, "/$distribution/Packages" ); |
1043 | } |
1044 | } |
1045 | |
1046 | clear_stat_cache(); |
1047 | |
1048 | print "]\n\n"; |
1049 | |
1050 | foreach my $fh (values %files_fh) |
1051 | { |
1052 | close $fh; |
1053 | } |
1054 | |
1055 | ###################################################################################### |
1056 | ## Main download |
1057 | |
1058 | chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror"); |
1059 | |
1060 | my $need_bytes = 0; |
1061 | foreach ( values %urls_to_download ) |
1062 | { |
1063 | $need_bytes += $_; |
1064 | } |
1065 | |
1066 | my $size_output = format_bytes($need_bytes); |
1067 | |
1068 | print "$size_output will be downloaded into archive.\n"; |
1069 | |
1070 | download_urls( "archive", sort keys %urls_to_download ); |
1071 | |
1072 | ###################################################################################### |
1073 | ## Copy skel to main archive |
1074 | |
1075 | sub copy_file |
1076 | { |
1077 | my ( $from, $to ) = @_; |
1078 | my $dir = dirname($to); |
1079 | return unless -f $from; |
1080 | make_path($dir) unless -d $dir; |
1081 | if ( get_variable("unlink") == 1 ) |
1082 | { |
1083 | if ( compare( $from, $to ) != 0 ) { unlink($to); } |
1084 | } |
1085 | my @stat_from = stat($from); |
1086 | if ( -f $to ) |
1087 | { |
1088 | my @stat_to = stat($to); |
1089 | return if ("@stat_to" eq "@stat_from"); |
1090 | } |
1091 | |
1092 | unless ( link( $from, $to ) or copy( $from, $to ) ) |
1093 | { |
1094 | warn("apt-mirror: can't copy $from to $to"); |
1095 | return; |
1096 | } |
1097 | my ( $atime, $mtime ) = @stat_from[ 8, 9 ]; |
1098 | utime( $atime, $mtime, $to ) or die("apt-mirror: can't utime $to"); |
1099 | } |
1100 | |
1101 | foreach (@release_urls, @index_urls) |
1102 | { |
1103 | die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][]; |
1104 | copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ); |
1105 | |
1106 | my $sanitized_uri = sanitise_uri($_); |
1107 | |
1108 | # If we downloaded any files from a checksum location, now is the time to |
1109 | # populate the canonical filename. |
1110 | if ($hashsum_to_files{$sanitized_uri}) |
1111 | { |
1112 | foreach my $filename (@{$hashsum_to_files{$sanitized_uri}}) |
1113 | { |
1114 | copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $filename ); |
1115 | if ($file_to_hashsums{$filename}) |
1116 | { |
1117 | foreach my $hashsum_filename (@{$file_to_hashsums{$filename}}) |
1118 | { |
1119 | copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $hashsum_filename ); |
1120 | } |
1121 | } |
1122 | } |
1123 | } |
1124 | } |
1125 | |
1126 | ###################################################################################### |
1127 | ## Make cleaning script |
1128 | |
1129 | my ( @rm_dirs, @rm_files ) = (); |
1130 | my $unnecessary_bytes = 0; |
1131 | |
1132 | sub process_symlink |
1133 | { |
1134 | return 1; # symlinks are always needed |
1135 | } |
1136 | |
1137 | sub process_file |
1138 | { |
1139 | my $file = shift; |
1140 | $file =~ s[~][%7E]g if get_variable("_tilde"); |
1141 | $file =~ s[\+][%2B]g if get_variable("_plus"); |
1142 | return 1 if $skipclean{$file}; |
1143 | push @rm_files, sanitise_uri($file); |
1144 | my ( undef, undef, undef, undef, undef, undef, undef, $size, undef, undef, undef, undef, $blocks ) = stat($file); |
1145 | $unnecessary_bytes += $blocks * 512; |
1146 | return 0; |
1147 | } |
1148 | |
1149 | sub process_directory |
1150 | { |
1151 | my $dir = shift; |
1152 | my $is_needed = 0; |
1153 | return 1 if $skipclean{$dir}; |
1154 | opendir( my $dir_h, $dir ) or die "apt-mirror: can't opendir $dir: $!"; |
1155 | foreach ( grep { !/^\.$/ && !/^\.\.$/ } readdir($dir_h) ) |
1156 | { |
1157 | my $item = $dir . "/" . $_; |
1158 | $is_needed |= process_directory($item) if -d $item && !-l $item; |
1159 | $is_needed |= process_file($item) if -f $item; |
1160 | $is_needed |= process_symlink($item) if -l $item; |
1161 | } |
1162 | closedir $dir_h; |
1163 | push @rm_dirs, $dir unless $is_needed; |
1164 | return $is_needed; |
1165 | } |
1166 | |
1167 | chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror"); |
1168 | |
1169 | foreach ( keys %clean_directory ) |
1170 | { |
1171 | process_directory($_) if -d $_ && !-l $_; |
1172 | } |
1173 | |
1174 | open CLEAN, ">" . get_variable("cleanscript") or die("apt-mirror: can't open clean script file"); |
1175 | |
1176 | my ( $i, $total ) = ( 0, scalar @rm_files ); |
1177 | |
1178 | if ( get_variable("_autoclean") ) |
1179 | { |
1180 | |
1181 | my $size_output = format_bytes($unnecessary_bytes); |
1182 | print "$size_output in $total files and " . scalar(@rm_dirs) . " directories will be freed..."; |
1183 | |
1184 | chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror"); |
1185 | |
1186 | foreach (@rm_files) { unlink $_; } |
1187 | foreach (@rm_dirs) { rmdir $_; } |
1188 | |
1189 | } |
1190 | else |
1191 | { |
1192 | |
1193 | my $size_output = format_bytes($unnecessary_bytes); |
1194 | print "$size_output in $total files and " . scalar(@rm_dirs) . " directories can be freed.\n"; |
1195 | print "Run " . get_variable("cleanscript") . " for this purpose.\n\n"; |
1196 | |
1197 | print CLEAN "#!/bin/sh\n"; |
1198 | print CLEAN "set -e\n\n"; |
1199 | print CLEAN "cd " . quoted_path(get_variable("mirror_path")) . "\n\n"; |
1200 | print CLEAN "echo 'Removing $total unnecessary files [$size_output]...'\n"; |
1201 | foreach (@rm_files) |
1202 | { |
1203 | print CLEAN "rm -f '$_'\n"; |
1204 | print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 500; |
1205 | print CLEAN "echo -n .\n" unless $i % 10; |
1206 | $i++; |
1207 | } |
1208 | print CLEAN "echo 'done.'\n"; |
1209 | print CLEAN "echo\n\n"; |
1210 | |
1211 | $i = 0; |
1212 | $total = scalar @rm_dirs; |
1213 | print CLEAN "echo 'Removing $total unnecessary directories...'\n"; |
1214 | foreach (@rm_dirs) |
1215 | { |
1216 | print CLEAN "if test -d '$_'; then rm -fr '$_'; fi\n"; |
1217 | print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 50; |
1218 | print CLEAN "echo -n .\n"; |
1219 | $i++; |
1220 | } |
1221 | print CLEAN "echo 'done.'\n"; |
1222 | print CLEAN "echo\n"; |
1223 | |
1224 | close CLEAN; |
1225 | |
1226 | } |
1227 | |
1228 | # Make clean script executable |
1229 | my $perm = ( stat get_variable("cleanscript") )[2] & 07777; |
1230 | chmod( $perm | 0111, get_variable("cleanscript") ); |
1231 | |
1232 | if ( get_variable("run_postmirror") ) |
1233 | { |
1234 | print "Running the Post Mirror script ...\n"; |
1235 | print "(" . get_variable("postmirror_script") . ")\n\n"; |
1236 | if ( -x get_variable("postmirror_script") ) |
1237 | { |
1238 | system( get_variable("postmirror_script"), '' ); |
1239 | } |
1240 | else |
1241 | { |
1242 | system( '/bin/sh', get_variable("postmirror_script") ); |
1243 | } |
1244 | print "\nPost Mirror script has completed. See above output for any possible errors.\n\n"; |
1245 | } |
1246 | |
1247 | unlock_aptmirror(); |
1248 |