Better way to mirror CPAN locally?

Fernando Corrêa de Oliveira fernandocorrea at gmail.com
Mon May 7 13:38:27 BST 2012



JAPH

Em 07/05/2012, às 09:21, gvim <gvimrc at gmail.com> escreveu:

> I currently use the script listed below, provided by Randall Schwartz, to mirror CPAN locally as I spend a lot of time Perl-ing without an internet connection. With CPAN now totalling around 2GB I'm wondering if there isn't a more efficient method as this script doesn't use any kind of rsync method and I end up just downloading the whole 2GB every time.
> 
> gvim
> 
> ***************************************************
> #!/usr/bin/perl -w
> use strict;
> $|++;
> 
> my $REMOTE = "http://mirror.bytemark.co.uk/CPAN/";
> 
> ## warning: unknown files below this dir are deleted!
> my $LOCAL = "/Users/gmac/cpmirror/mirror/";
> 
> my $TRACE = 1;
> 
> 
> ## core -
> use File::Path qw(mkpath);
> use File::Basename qw(dirname);
> use File::Spec::Functions qw(catfile);
> use File::Find qw(find);
> 
> ## LWP -
> use URI ();
> use LWP::Simple qw(mirror RC_OK RC_NOT_MODIFIED);
> 
> ## Compress::Zlib -
> use Compress::Zlib qw(gzopen $gzerrno);
> 
> ## first, get index files
> my_mirror($_) for qw(authors/01mailrc.txt.gz modules/02packages.details.txt.gz modules/03modlist.data.gz);
> 
> ## now walk the packages list
> my $details = catfile($LOCAL, qw(modules 02packages.details.txt.gz));
> my $gz = gzopen($details, "rb") or die "Cannot open details: $gzerrno";
> my $inheader = 1;
> while ($gz->gzreadline($_) > 0) {
>  if ($inheader) {
>    $inheader = 0 unless /\S/;
>    next;
>  }
>  my ($module, $version, $path) = split;
>  next if $path =~ m{/perl-5};  # skip Perl distributions
>  my_mirror("authors/id/$path", 1);
> }
> 
> ## finally, clean the files we didn't stick there
> clean_unmirrored();
> exit 0;
> 
> BEGIN {
> my %mirrored;
> 
> sub my_mirror {
> my $path = shift;           # partial URL
> my $skip_if_present = shift; # true/false
> 
> my $remote_uri = URI->new_abs($path, $REMOTE)->as_string; # full URL
> my $local_file = catfile($LOCAL, split "/", $path); # native absolute file
> my $checksum_might_be_up_to_date = 1;
> 
> if ($skip_if_present and -f $local_file) {
>   ## upgrade to checked if not already
>   $mirrored{$local_file} = 1 unless $mirrored{$local_file};
> } elsif (($mirrored{$local_file} || 0) < 2) {
>     ## upgrade to full mirror
>     $mirrored{$local_file} = 2;
>     mkpath(dirname($local_file), $TRACE, 0711);
>     print $path if $TRACE;
>     my $status = mirror($remote_uri, $local_file);
> 
>     if ($status == RC_OK) {
>       $checksum_might_be_up_to_date = 0;
>       print " ... updated\n" if $TRACE;
>     } elsif ($status != RC_NOT_MODIFIED) {
>         warn "\n$remote_uri: $status\n";
>         return;
>       } else {
>         {
>         print " ... up to date\n" if $TRACE;
>         }
>       }
> 
>     if ($path =~ m{^authors/id}) { # maybe fetch CHECKSUMS
>       my $checksum_path = URI->new_abs("CHECKSUMS", $remote_uri)->rel($REMOTE);
>       if ($path ne $checksum_path) {
>         my_mirror($checksum_path, $checksum_might_be_up_to_date);
>       }
>     }
>   }
> 
>   sub clean_unmirrored {
>     find sub {
>       return unless -f and not $mirrored{$File::Find::name};
>       print "$File::Find::name ... removed\n" if $TRACE;
>       unlink $_ or warn "Cannot remove $File::Find::name: $!";
>     }, $LOCAL;
>   }
> }
> }



More information about the london.pm mailing list