Permalink
Browse files

Added Solr filter queries for audio, video and application domains

Inspired from the existing one used on image search, and consistent with
post filtering on content domain applied in SearchEvent.addNodes().

These filters are quite simplistic but at least audio, video or
application search now return results. Previously, when filtering on
these content domains, many results pages (and often even the first
page) were empty while the total results count suggested that results
should be available. This was because filtering on domain was only
applied AFTER requesting Solr indexes.
  • Loading branch information...
luccioman committed Sep 8, 2017
1 parent 5d3ceb3 commit 66cb9c4ff9c7bb126b886189eb3bf9facdb4a65b
Showing with 85 additions and 8 deletions.
  1. +58 −2 source/net/yacy/search/query/QueryGoal.java
  2. +27 −6 source/net/yacy/search/query/QueryParams.java
@@ -33,6 +33,8 @@
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.http.HttpStatus;
import net.yacy.cora.document.WordCache;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.order.NaturalOrder;
@@ -345,7 +347,7 @@ public void filterOut(final SortedSet<String> blueList) {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
if (noimages) {
fqs.add("-" + CollectionSchema.content_type.getSolrFieldName() + ":(image/*)");
fqs.add("-" + CollectionSchema.url_file_ext_s.getSolrFieldName() + ":(jpg OR png OR gif)");
@@ -379,13 +381,67 @@ public StringBuilder collectionTextQuery() {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(
CollectionSchema.content_type.getSolrFieldName() + ":(image/*) OR " +
CollectionSchema.images_urlstub_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
return fqs;
}
/**
* Generate Solr filter queries to receive valid video content results.
*
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix video/* as well
* docuemnts with links to video content.
*
* @return Solr filter queries for video content URLs
*/
public List<String> collectionAudioFilterQuery() {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(audio/*) OR "
+ CollectionSchema.audiolinkscount_i.getSolrFieldName() + ":[1 TO *]");
return fqs;
}
/**
* Generate Solr filter queries to receive valid video content results.
*
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix video/* as well
* docuemnts with links to video content.
*
* @return Solr filter queries for video content URLs
*/
public List<String> collectionVideoFilterQuery() {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(video/*) OR "
+ CollectionSchema.videolinkscount_i.getSolrFieldName() + ":[1 TO *]");
return fqs;
}
/**
* Generate Solr filter queries to receive valid application specific content results.
*
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix application/* as well
* docuemnts with links to application specific content.
*
* @return Solr filter queries for application specific content URLs
*/
public List<String> collectionApplicationFilterQuery() {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(application/*) OR "
+ CollectionSchema.applinkscount_i.getSolrFieldName() + ":[1 TO *]");
return fqs;
}
public StringBuilder collectionImageQuery(final QueryModifier modifier) {
final StringBuilder q = new StringBuilder(80);
@@ -111,7 +111,6 @@ public String toString() {
public static final Bitfield empty_constraint = new Bitfield(4, "AAAAAA");
public static final Pattern catchall_pattern = Pattern.compile(".*");
private static final Pattern matchnothing_pattern = Pattern.compile("");
private final QueryGoal queryGoal;
public int itemsPerPage;
@@ -370,19 +369,41 @@ protected static final boolean anymatch(final String text, final Iterator<String
}
public SolrQuery solrQuery(final ContentDomain cd, final boolean getFacets, final boolean excludeintext_image) {
if (cd == ContentDomain.IMAGE) return solrImageQuery(getFacets);
return solrTextQuery(getFacets, excludeintext_image);
if (cd == ContentDomain.IMAGE) {
return solrImageQuery(getFacets);
}
final List<String> filterQueries;
switch (cd) {
case AUDIO:
filterQueries = this.queryGoal.collectionAudioFilterQuery();
break;
case VIDEO:
filterQueries = this.queryGoal.collectionVideoFilterQuery();
break;
case APP:
filterQueries = this.queryGoal.collectionApplicationFilterQuery();
break;
default:
filterQueries = this.queryGoal.collectionTextFilterQuery(excludeintext_image);
break;
}
return solrQuery(getFacets, filterQueries);
}
private SolrQuery solrTextQuery(final boolean getFacets, final boolean excludeintext_image) {
/**
* @param getFacets when true, generate facets for fiels given in this.facetfields
* @param filterQueries a mutable list of filter queries, initialized with filters related to content domain. Must not be null.
* @return a Solr query instance ready to use
*/
private SolrQuery solrQuery(final boolean getFacets, final List<String> filterQueries) {
if (this.cachedQuery != null) {
this.cachedQuery.setStart(this.offset);
if (!getFacets) this.cachedQuery.setFacet(false);
return this.cachedQuery;
}
// construct query
final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionTextFilterQuery(excludeintext_image));
final SolrQuery params = getBasicParams(getFacets, filterQueries);
int rankingProfile = this.ranking.coeff_date == RankingProfile.COEFF_MAX ? 1 : (this.modifier.sitehash != null || this.modifier.sitehost != null) ? 2 : 0;
params.setQuery(this.queryGoal.collectionTextQuery().toString());
Ranking actRanking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile
@@ -438,7 +459,7 @@ private SolrQuery solrImageQuery(boolean getFacets) {
return params;
}
private SolrQuery getBasicParams(boolean getFacets, List<String> fqs) {
private SolrQuery getBasicParams(final boolean getFacets, final List<String> fqs) {
final SolrQuery params = new SolrQuery();
params.setParam("defType", "edismax");
params.setParam(DisMaxParams.QF, CollectionSchema.text_t.getSolrFieldName() + "^1.0");

0 comments on commit 66cb9c4

Please sign in to comment.