From 566d6c980cb2e96822d427435bfcfb8895771689 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 17 Apr 2013 16:15:27 +0200 Subject: [PATCH] checking of document signature for a double-document check now refers only to documents within the same domain --- source/net/yacy/search/index/Segment.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 70087656a2..6a97d6ae91 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -492,6 +492,7 @@ public SolrInputDocument storeDocument( final CollectionConfiguration.SolrVector vector = this.fulltext.getDefaultConfiguration().yacy2solr(id, profile, responseHeader, document, condenser, referrerURL, language, urlCitationIndex, this.fulltext.getWebgraphConfiguration()); // FIND OUT IF THIS IS A DOUBLE DOCUMENT + String hostid = url.hosthash(); for (CollectionSchema[] checkfields: new CollectionSchema[][]{ {CollectionSchema.exact_signature_l, CollectionSchema.exact_signature_unique_b}, {CollectionSchema.fuzzy_signature_l, CollectionSchema.fuzzy_signature_unique_b}}) { @@ -501,7 +502,7 @@ public SolrInputDocument storeDocument( // lookup the document with the same signature long signature = ((Long) vector.getField(checkfield.getSolrFieldName()).getValue()).longValue(); try { - if (this.fulltext.getDefaultConnector().existsByQuery(checkfield.getSolrFieldName() + ":\"" + Long.toString(signature) + "\"")) { + if (this.fulltext.getDefaultConnector().existsByQuery(CollectionSchema.host_id_s + ":\"" + hostid + "\" AND " + checkfield.getSolrFieldName() + ":\"" + Long.toString(signature) + "\"")) { // change unique attribut in content vector.setField(uniquefield.getSolrFieldName(), false); } @@ -511,7 +512,6 @@ public SolrInputDocument storeDocument( // CHECK IF TITLE AND DESCRIPTION IS UNIQUE (this is by default not switched on) if (this.fulltext.getDefaultConfiguration().contains(CollectionSchema.host_id_s)) { - String hostid = url.hosthash(); uniquecheck: for (CollectionSchema[] checkfields: new CollectionSchema[][]{ {CollectionSchema.title, CollectionSchema.title_exact_signature_l, CollectionSchema.title_unique_b}, {CollectionSchema.description, CollectionSchema.description_exact_signature_l, CollectionSchema.description_unique_b}}) {