Better way to mirror CPAN locally?

gvim gvimrc at gmail.com
Mon May 7 13:21:37 BST 2012


I currently use the script listed below, provided by Randall Schwartz, to mirror CPAN locally as I spend a lot of time Perl-ing without an internet connection. With CPAN now totalling around 2GB I'm wondering if there isn't a more efficient method as this script doesn't use any kind of rsync method and I end up just downloading the whole 2GB every time.

gvim

***************************************************
#!/usr/bin/perl -w
use strict;
$|++;

my $REMOTE = "http://mirror.bytemark.co.uk/CPAN/";

## warning: unknown files below this dir are deleted!
my $LOCAL = "/Users/gmac/cpmirror/mirror/";

my $TRACE = 1;


## core -
use File::Path qw(mkpath);
use File::Basename qw(dirname);
use File::Spec::Functions qw(catfile);
use File::Find qw(find);

## LWP -
use URI ();
use LWP::Simple qw(mirror RC_OK RC_NOT_MODIFIED);

## Compress::Zlib -
use Compress::Zlib qw(gzopen $gzerrno);

## first, get index files
my_mirror($_) for qw(authors/01mailrc.txt.gz modules/02packages.details.txt.gz modules/03modlist.data.gz);

## now walk the packages list
my $details = catfile($LOCAL, qw(modules 02packages.details.txt.gz));
my $gz = gzopen($details, "rb") or die "Cannot open details: $gzerrno";
my $inheader = 1;
while ($gz->gzreadline($_) > 0) {
   if ($inheader) {
     $inheader = 0 unless /\S/;
     next;
   }
   my ($module, $version, $path) = split;
   next if $path =~ m{/perl-5};  # skip Perl distributions
   my_mirror("authors/id/$path", 1);
}

## finally, clean the files we didn't stick there
clean_unmirrored();
exit 0;

BEGIN {
my %mirrored;

sub my_mirror {
  my $path = shift;           # partial URL
  my $skip_if_present = shift; # true/false

  my $remote_uri = URI->new_abs($path, $REMOTE)->as_string; # full URL
  my $local_file = catfile($LOCAL, split "/", $path); # native absolute file
  my $checksum_might_be_up_to_date = 1;

  if ($skip_if_present and -f $local_file) {
    ## upgrade to checked if not already
    $mirrored{$local_file} = 1 unless $mirrored{$local_file};
  } elsif (($mirrored{$local_file} || 0) < 2) {
      ## upgrade to full mirror
      $mirrored{$local_file} = 2;
      mkpath(dirname($local_file), $TRACE, 0711);
      print $path if $TRACE;
      my $status = mirror($remote_uri, $local_file);

      if ($status == RC_OK) {
        $checksum_might_be_up_to_date = 0;
        print " ... updated\n" if $TRACE;
      } elsif ($status != RC_NOT_MODIFIED) {
          warn "\n$remote_uri: $status\n";
          return;
        } else {
          {
          print " ... up to date\n" if $TRACE;
          }
        }

      if ($path =~ m{^authors/id}) { # maybe fetch CHECKSUMS
        my $checksum_path = URI->new_abs("CHECKSUMS", $remote_uri)->rel($REMOTE);
        if ($path ne $checksum_path) {
          my_mirror($checksum_path, $checksum_might_be_up_to_date);
        }
      }
    }

    sub clean_unmirrored {
      find sub {
        return unless -f and not $mirrored{$File::Find::name};
        print "$File::Find::name ... removed\n" if $TRACE;
        unlink $_ or warn "Cannot remove $File::Find::name: $!";
      }, $LOCAL;
    }
  }
}


More information about the london.pm mailing list