Permalink
Browse files

improve precision

  • Loading branch information...
monken committed Oct 26, 2011
1 parent 7c91db0 commit f781b4aec65b807fe8164d9f7b4120ba33bd8b6c
Showing with 124 additions and 75 deletions.
  1. +1 −1 lib/MetaCPAN/Web/Controller/Search.pm
  2. +112 −66 lib/MetaCPAN/Web/Model/API/Module.pm
  3. +11 −8 t/controller/search/precision.t
@@ -18,7 +18,7 @@ sub index : Path {
if ($query = $req->param('q')) {
# Searching for e.g. "DBIx::Class" is just like searching for
# "DBIx Class"
- $query =~ s/::/ /g;
+ #$query =~ s/::/ /g;
}
my $model = $c->model('API::Module');
@@ -332,95 +332,140 @@ sub first {
sub search {
my ( $self, $query, $params ) = @_;
+ ( my $clean = $query ) =~ s/::/ /g;
my $search = merge(
$params,
{ query => {
- filtered => {
+ custom_score => {
+ script => qq{
+ documentation = doc['documentation'].stringValue;
+ if(documentation == empty) {
+ documentation = 'xxxxxxxxxxxxxxxxxxxxxxxxx'
+ }
+ return _score - documentation.length()/1000
+ },
query => {
- custom_score => {
+ filtered => {
query => {
- query_string => {
- fields => [
- 'documentation.analyzed^7',
- 'documentation.camelcase^3',
- 'file.module.name.analyzed^3',
- 'file.module.name.camelcase^3',
- 'distribution.analyzed^10',
- 'distribution.camelcase^5',
- 'abstract.analyzed^2',
- 'pod.analyzed',
- ],
- query => $query,
- allow_leading_wildcard => \0,
- default_operator => 'AND'
- }
- },
-
- # prefer shorter module names slightly
- script => qq{
- documentation = doc['documentation'].stringValue;
- if(documentation == empty) {
- documentation = 'xxxxxxxxxxxxxxxxxxxxxxxxx'
- }
- return _score - documentation.length()/30 + doc[\"date\"].date.getMillis() / 1000000000000
-}
- }
- },
- filter => {
- and => [
- { not => {
- filter => {
- or => [
- map {
- { term => {
- 'file.distribution' =>
- $_
- }
+ bool => {
+ should => [
+ { term => {
+ 'file.documentation' => {
+ value => $query,
+ boost => 20
}
- } @ROGUE_DISTRIBUTIONS
- ]
- }
+ }
+ },
+ { term => {
+ 'file.module.name' => {
+ value => $query,
+ boost => 20
+ }
+ }
+ },
+ { dis_max => {
+ queries => [
+ { query_string => {
+ fields => [
+ qw(documentation.analyzed^2 file.module.name.analyzed^2 distribution.analyzed),
+ qw(documentation.camelcase file.module.name.camelcase distribution.camelcase)
+ ],
+ query => $clean,
+ boost => 3,
+ default_operator =>
+ 'AND',
+ allow_leading_wildcard =>
+ \0,
+
+ use_dis_max => \1,
+
+ }
+ },
+ { query_string => {
+ fields => [
+ qw(abstract.analyzed pod.analyzed)
+ ],
+ query => $clean,
+ boost => 0.1,
+ default_operator =>
+ 'AND',
+ allow_leading_wildcard =>
+ \0,
+ use_dis_max => \1,
+
+ }
+ }
+ ]
+ }
+ }
+
+ ]
}
},
- { term => { status => 'latest' } },
- { or => [
+ filter => {
+ and => [
+ { not => {
+ filter => {
+ or => [
+ map {
+ { term => {
+ 'file.distribution'
+ => $_
+ }
+ }
+ } @ROGUE_DISTRIBUTIONS
+ ]
+ }
+ }
+ },
+ { term => { status => 'latest' } },
+ { or => [
# we are looking for files that have no authorized
# property (e.g. .pod files) and files that are
# authorized
- { missing =>
- { field => 'file.authorized' }
- },
- { term => { 'file.authorized' => \1 } },
- ]
- },
- { or => [
- { and => [
- { exists => {
- field =>
- 'file.module.name'
+ { missing => {
+ field => 'file.authorized'
}
},
{ term => {
- 'file.module.indexed' =>
- \1
+ 'file.authorized' => \1
}
- }
+ },
]
},
- { and => [
- { exists => {
- field => 'documentation'
- }
+ { or => [
+ { and => [
+ { exists => {
+ field =>
+ 'file.module.name'
+ }
+ },
+ { term => {
+ 'file.module.indexed'
+ => \1
+ }
+ }
+ ]
},
- { term =>
- { 'file.indexed' => \1 }
+ { and => [
+ { exists => {
+ field =>
+ 'documentation'
+ }
+ },
+ { term => {
+ 'file.indexed' =>
+ \1
+ }
+ }
+ ]
}
]
}
]
}
- ]
+ }
}
}
},
@@ -483,7 +528,8 @@ sub requires {
sub {
my $data = shift->recv;
$cv->send(
- { data => [map { $_->{_source} } @{$data->{hits}->{hits}}],
+ { data =>
+ [ map { $_->{_source} } @{ $data->{hits}->{hits} } ],
total => $data->{hits}->{total},
took => $data->{took}
}
@@ -4,14 +4,17 @@ use Test::More;
use MetaCPAN::Web::Test;
my %tests = (
- 'net dns' => 'Net::DNS',
- 'DBIx::Class' => 'DBIx::Class',
- 'anyevent' => 'AnyEvent',
- 'AnyEvent' => 'AnyEvent',
- 'anyevent http' => 'AnyEvent::HTTP',
- 'dist zilla' => 'Dist::Zilla',
- 'dbi' => 'DBI',
- 'Perl::Critic' => 'Perl::Critic',
+ 'net dns' => 'Net::DNS',
+ 'DBIx::Class' => 'DBIx::Class',
+ 'anyevent' => 'AnyEvent',
+ 'AnyEvent' => 'AnyEvent',
+ 'anyevent http' => 'AnyEvent::HTTP',
+ 'dist zilla' => 'Dist::Zilla',
+ 'dbi' => 'DBI',
+ 'Perl::Critic' => 'Perl::Critic',
+ 'HTML::TokeParser' => 'HTML::TokeParser',
+ 'HTML::Element' => 'HTML::Element',
+ 'net::amazon::s3' => 'Net::Amazon::S3',
);
test_psgi app, sub {

0 comments on commit f781b4a

Please sign in to comment.