Skip to content

Commit

Permalink
Fixes a bug where distvname was out of sync between modules and dist …
Browse files Browse the repository at this point in the history
…in Padre (and likely other dists which have more than one version in the main index)
  • Loading branch information
oalders committed Dec 17, 2010
1 parent 3cc21d5 commit 7070733
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 62 deletions.
41 changes: 23 additions & 18 deletions elasticsearch/index_dists.pl
Expand Up @@ -5,6 +5,7 @@
use Every;
use Find::Lib '../lib';
use MetaCPAN;
use MetaCPAN::Dist;
use Time::HiRes qw( gettimeofday tv_interval );

my $t_begin = [gettimeofday];
Expand All @@ -28,14 +29,14 @@

elsif ( $cpan->dist_name ) {
say "searching for dist: " . $cpan->dist_name;
@dists = ( $cpan->dist_name );
@dists = search_dists( { dist => $cpan->dist_name } );
}

else {
say "search all dists";
@dists = search_dists();
}

say dump( \@dists);
foreach my $dist ( @dists ) {
process_dist( $dist );
}
Expand All @@ -45,14 +46,12 @@

sub process_dist {

my $dist_name = shift;
# next if $dist_name eq 'Bio-Perl'; # this dist is messed
my $distvname = shift;
my $t0 = [gettimeofday];

say '+' x 20 . " DIST: $dist_name" if $cpan->debug;
say '+' x 20 . " DIST: $distvname" if $cpan->debug;

my $dist = $cpan->dist( $dist_name );
$dist->module_rs( $cpan->module_rs );
my $dist = MetaCPAN::Dist->new( distvname => $distvname, module_rs => $cpan->module_rs );
$dist->process;

say "Found " . scalar @{ $dist->processed } . " modules in dist";
Expand All @@ -68,7 +67,7 @@ sub process_dist {
my $elapsed = tv_interval( $t_begin, [gettimeofday] );
say '#' x 78;

say "$dist_name"; # if $icpan->debug;
say "$distvname"; # if $icpan->debug;
say "$iter_time to process dist";
say "$elapsed so far... ($attempts dists out of $total_dists)";

Expand All @@ -88,21 +87,27 @@ sub process_dist {
}

sub search_dists {

my $constraints = shift || {};

my $search = $cpan->module_rs->search( $constraints,
{ columns => ['dist'], distinct => 1, order_by => 'dist ASC' } );

$total_dists = $search->count;
my @dists = ( );
my $constraints = shift || {};

my $search = $cpan->module_rs->search(
$constraints,
{ +select => 'distvname',
columns => ['dist'],
distinct => 1,
order_by => 'distvname DESC',
rows => 1
}
);

my @dists = ();
while ( my $row = $search->next ) {
push @dists, $row->dist;
push @dists, $row->distvname;
}

say "found $total_dists distros";

return @dists;

}

10 changes: 6 additions & 4 deletions lib/MetaCPAN.pm
Expand Up @@ -30,6 +30,11 @@ has 'db_path' => (
default => '../CPAN-meta.sqlite',
);

has 'distvname' => (
is => 'rw',
isa => 'Str',
);

has 'dist_name' => (
is => 'rw',
isa => 'Str',
Expand Down Expand Up @@ -101,12 +106,9 @@ LINE:
sub dist {

my $self = shift;
my $name = shift;
$name =~ s{::}{-}g;

return MetaCPAN::Dist->new(
name => $name,
module_rs => $self->module_rs
distvname => $self->distvname,
);

}
Expand Down
52 changes: 14 additions & 38 deletions lib/MetaCPAN/Dist.pm
Expand Up @@ -18,6 +18,11 @@ with 'MetaCPAN::Role::DB';

has 'archive_parent' => ( is => 'rw', );

has 'distvname' => (
is => 'rw',
required => 1,
);

has 'es_inserts' => (
is => 'rw',
isa => 'ArrayRef',
Expand Down Expand Up @@ -84,9 +89,7 @@ sub process {
my $self = shift;
my $success = 0;

#return 0 if !$self->tar;

my $module_rs = $self->module_rs->search({ dist => $self->name });
my $module_rs = $self->module_rs->search({ distvname => $self->distvname });

my @modules = ();
while ( my $found = $module_rs->next ) {
Expand Down Expand Up @@ -121,22 +124,10 @@ MODULE:
}
}


#FILE:
# foreach my $file ( sort keys %{ $self->files } ) {
# say "checking files: $file " if $self->debug;
# next FILE if !$self->parse_pod( $found->name, $file );
#
# say "found: $file ";
# ++$success;
# next MODULE;
# }

}

$self->process_cookbooks;

$self->index_dist;
$self->process_cookbooks;

if ( $self->es_inserts ) {
my $result = $self->es->bulk( $self->es_inserts );
Expand All @@ -153,21 +144,6 @@ MODULE:

}

sub modules {

my $self = shift;
my $name = $self->name;
$name =~ s{::}{-}g;
$self->name( $name );

# I'm sure there is a better way of doing this (GROUP BY?)
my $latest = $self->module_rs->search( { dist => $self->name },
{ order_by => 'distvname DESC' } )->first;

return $self->module_rs->search( { distvname => $latest->distvname } );

}

sub process_cookbooks {

my $self = shift;
Expand Down Expand Up @@ -267,7 +243,7 @@ sub parse_pod {
$parser->output_string( \$xhtml );
$parser->parse_string_document( $content );

$module->xhtml_pod( $xhtml );
#$module->xhtml_pod( $xhtml );
$module->file( $file );
$module->update;

Expand Down Expand Up @@ -429,7 +405,7 @@ sub _build_files {
sub _build_metadata {

my $self = shift;
return $self->module_rs->search( { dist => $self->name } )->first;
return $self->module_rs->search( { distvname => $self->distvname } )->first;

}

Expand Down Expand Up @@ -514,11 +490,7 @@ sub set_archive_parent {

=pod
=head2 archive_path
Full file path to module archive.
=head2 modules
=head1 SYNOPSIS
We only care about modules which are in the very latest version of the distro.
For example, the minicpan (and CPAN) indices, show something like this:
Expand All @@ -530,6 +502,10 @@ We don't care about modules which are no longer included in the latest
distribution, so we'll only import POD from the highest version number of any
distro we're searching on.
=head2 archive_path
Full file path to module archive.
=cut

=head2 process
Expand Down
3 changes: 1 addition & 2 deletions lib/MetaCPAN/Role/DB.pm
Expand Up @@ -21,8 +21,7 @@ has 'module_rs' => (
is => 'rw',
default => sub {
my $self = shift;
return my $rs
= $self->schema->resultset( 'MetaCPAN::Schema::Result::Module' );
return my $rs = $self->schema->resultset( 'Module' );
},
);

Expand Down

0 comments on commit 7070733

Please sign in to comment.