Skip to content

Commit

Permalink
added the protocol and the file name extension to the solr fields since
Browse files Browse the repository at this point in the history
these fields are probably facets in file search
  • Loading branch information
Orbiter committed Sep 11, 2012
1 parent e072632 commit 62add1d
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 12 deletions.
5 changes: 4 additions & 1 deletion defaults/solr.keys.list
Expand Up @@ -298,11 +298,14 @@ italic_txt
#iframesscount_i

## the protocol of the url
#url_protocol_s
url_protocol_s

## all path elements in the url
url_paths_sxt

## the file name extension
url_file_ext_s

## number of key-value pairs in search part of the url
#url_parameter_i

Expand Down
Expand Up @@ -141,6 +141,10 @@ public void write(final Writer writer, final SolrQueryRequest request, final Sol
path.append('/').append(value.stringValue());
continue;
}
if (YaCySchema.url_file_ext_s.name().equals(fieldName)) {
solitaireTag(writer, "ext", value.stringValue());
continue;
}
if (YaCySchema.last_modified.name().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
solitaireTag(writer, "pubDate", HeaderFramework.formatRFC1123(d));
Expand Down
23 changes: 12 additions & 11 deletions source/net/yacy/search/index/SolrConfiguration.java
Expand Up @@ -286,9 +286,8 @@ public SolrInputDocument metadata2solr(final URIMetadata md) {
}

// path elements of link
if (allAttr || contains(YaCySchema.url_paths_sxt)) {
add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
}
if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());

if (allAttr || contains(YaCySchema.imagescount_i)) add(doc, YaCySchema.imagescount_i, md.limage());
if (allAttr || contains(YaCySchema.inboundlinkscount_i)) add(doc, YaCySchema.inboundlinkscount_i, md.llocal());
Expand Down Expand Up @@ -416,9 +415,8 @@ public SolrInputDocument yacy2solr(final String id, final CrawlProfile profile,
}

// path elements of link
if (allAttr || contains(YaCySchema.url_paths_sxt)) {
add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
}
if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());

// get list of all links; they will be shrinked by urls that appear in other fields of the solr scheme
Set<MultiProtocolURI> inboundLinks = yacydoc.inboundLinks();
Expand Down Expand Up @@ -860,13 +858,16 @@ public SolrInputDocument err(final DigestURI digestURI, final String failReason,
add(solrdoc, YaCySchema.id, ASCII.String(digestURI.hash()));
add(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false));
final InetAddress address = digestURI.getInetAddress();
if (address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress());
if (digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost());
if (contains(YaCySchema.ip_s) && address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress());
if (contains(YaCySchema.host_s) && digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost());

// path elements of link
add(solrdoc, YaCySchema.url_paths_sxt, digestURI.getPaths());
add(solrdoc, YaCySchema.failreason_t, failReason);
add(solrdoc, YaCySchema.httpstatus_i, httpstatus);
if (contains(YaCySchema.url_paths_sxt)) add(solrdoc, YaCySchema.url_paths_sxt, digestURI.getPaths());
if (contains(YaCySchema.url_file_ext_s)) add(solrdoc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());

// fail reason and status
if (contains(YaCySchema.failreason_t)) add(solrdoc, YaCySchema.failreason_t, failReason);
if (contains(YaCySchema.httpstatus_i)) add(solrdoc, YaCySchema.httpstatus_i, httpstatus);
return solrdoc;
}

Expand Down
1 change: 1 addition & 0 deletions source/net/yacy/search/index/YaCySchema.java
Expand Up @@ -141,6 +141,7 @@ public enum YaCySchema implements Schema {

url_protocol_s(SolrType.string, true, true, false, "the protocol of the url"),
url_paths_sxt(SolrType.string, true, true, true, "all path elements in the url"),
url_file_ext_s(SolrType.string, true, true, false, "the file name extension"),
url_parameter_i(SolrType.integer, true, true, false, "number of key-value pairs in search part of the url"),
url_parameter_key_sxt(SolrType.string, true, true, true, "the keys from key-value pairs in the search part of the url"),
url_parameter_value_sxt(SolrType.string, true, true, true, "the values from key-value pairs in the search part of the url"),
Expand Down

0 comments on commit 62add1d

Please sign in to comment.