Skip to content

Commit

Permalink
added query modifier 'on'. This makes it possible to search for date
Browse files Browse the repository at this point in the history
occurrences within the (web) page documents (not the document
last-modified!). This works only if the solr field dates_in_content_sxt
is enabled. A search request may then have the form "term on:<date>",
like
gift on:24.12.2014
gift on:2014/12/24
* on:2014/12/31
For the date format you may use any kind of human-readable date
representation(!yes!) - the on:<date> parser tries to identify language
and also knows event names, like:
bunny on:eastern
.. as long as the date term has no spaces inside (use a dot). Further
enhancement will be made to accept also strings encapsulated with
quotes.
  • Loading branch information
Orbiter committed Dec 16, 2014
1 parent 1cfddea commit 6512543
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 1 deletion.
16 changes: 16 additions & 0 deletions source/net/yacy/document/DateDetection.java
Expand Up @@ -35,6 +35,8 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import net.yacy.cora.date.GenericFormatter;

/**
* The purpose of this class exceeds the demands on simple date parsing using a SimpleDateFormat
* because it tries to
Expand Down Expand Up @@ -494,6 +496,20 @@ public static LinkedHashSet<Date> parse(String text) {
return dates;
}

public static Date parseLine(String text) {
Date d = null;
try {d = CONFORM.parse(text);} catch (ParseException e) {}
if (d == null) try {d = GenericFormatter.FORMAT_SHORT_DAY.parse(text);} catch (ParseException e) {}
if (d == null) try {d = GenericFormatter.FORMAT_RFC1123_SHORT.parse(text);} catch (ParseException e) {}
if (d == null) try {d = GenericFormatter.FORMAT_ANSIC.parse(text);} catch (ParseException e) {}

if (d == null) {
Set<Date> dd = parse(text);
if (dd.size() >= 1) d = dd.iterator().next();
}
return d;
}

private static LinkedHashSet<Date> parseRawDate(String text) {
// get parse alternatives for different date styles; we consider that one document uses only one style
LinkedHashSet<Date> DMYDates = EndianStyle.DMY.parse(text);
Expand Down
30 changes: 29 additions & 1 deletion source/net/yacy/search/query/QueryModifier.java
Expand Up @@ -22,13 +22,15 @@

import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;

import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MultiMapSolrParams;

import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.document.DateDetection;
import net.yacy.kelondro.util.ISO639;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects;
Expand All @@ -37,7 +39,7 @@
public class QueryModifier {

private final StringBuilder modifier;
public String sitehost, sitehash, filetype, protocol, language, author, collection;
public String sitehost, sitehash, filetype, protocol, language, author, collection, on;

public QueryModifier() {
this.sitehash = null;
Expand All @@ -47,6 +49,7 @@ public QueryModifier() {
this.language = null;
this.author = null;
this.collection = null;
this.on = null;
this.modifier = new StringBuilder(20);
}

Expand Down Expand Up @@ -142,6 +145,18 @@ public String parse(String querystring) {
querystring = querystring.replace("collection:" + this.collection, "");
add("collection:" + this.collection);
}

// parse on-date
final int oni = querystring.indexOf("on:", 0);
if ( oni >= 0 ) {
int ftb = querystring.indexOf(' ', oni);
if ( ftb == -1 ) {
ftb = querystring.length();
}
this.on = querystring.substring(oni + 3, ftb);
querystring = querystring.replace("on:" + this.on, "");
add("on:" + this.on);
}

// parse language
final int langi = querystring.indexOf("/language/");
Expand Down Expand Up @@ -240,6 +255,10 @@ private StringBuilder apply(String FQ) {
fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.collection));
}

if (this.on != null && this.on.length() > 0 && fq.indexOf(CollectionSchema.dates_in_content_sxt.getSolrFieldName()) < 0) {
fq.append(" AND ").append(QueryModifier.parseOnExpression(this.on));
}

if (this.protocol != null && this.protocol.length() > 0 && fq.indexOf(CollectionSchema.url_protocol_s.getSolrFieldName()) < 0) {
fq.append(" AND ").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(":\"").append(this.protocol).append('\"');
}
Expand Down Expand Up @@ -295,6 +314,15 @@ public static String parseCollectionExpression(String collectionDescription) {
filterQuery.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(0)).append('\"');
}
return filterQuery.toString();
}

public static String parseOnExpression(String onDescription) {
Date onDate = DateDetection.parseLine(onDescription);
StringBuilder filterQuery = new StringBuilder(20);
if (onDate != null) {
filterQuery.append(CollectionSchema.dates_in_content_sxt.getSolrFieldName()).append(":\"").append(org.apache.solr.schema.TrieDateField.formatExternal(onDate)).append('\"');
}
return filterQuery.toString();

}

Expand Down
4 changes: 4 additions & 0 deletions source/net/yacy/search/query/QueryParams.java
Expand Up @@ -481,6 +481,10 @@ private String getFacets() {
fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.modifier.collection));
}

if (this.modifier.on != null && this.modifier.on.length() > 0 && this.solrSchema.contains(CollectionSchema.dates_in_content_sxt)) {
fq.append(" AND ").append(QueryModifier.parseOnExpression(this.modifier.on));
}

if (this.modifier.protocol != null) {
fq.append(" AND {!tag=").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append("}").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(':').append(this.modifier.protocol);
}
Expand Down

0 comments on commit 6512543

Please sign in to comment.