Skip to content

Commit

Permalink
checking of document signature for a double-document check now refers
Browse files Browse the repository at this point in the history
only to documents within the same domain
  • Loading branch information
Orbiter committed Apr 17, 2013
1 parent 1d30082 commit 566d6c9
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions source/net/yacy/search/index/Segment.java
Expand Up @@ -492,6 +492,7 @@ public SolrInputDocument storeDocument(
final CollectionConfiguration.SolrVector vector = this.fulltext.getDefaultConfiguration().yacy2solr(id, profile, responseHeader, document, condenser, referrerURL, language, urlCitationIndex, this.fulltext.getWebgraphConfiguration());

// FIND OUT IF THIS IS A DOUBLE DOCUMENT
String hostid = url.hosthash();
for (CollectionSchema[] checkfields: new CollectionSchema[][]{
{CollectionSchema.exact_signature_l, CollectionSchema.exact_signature_unique_b},
{CollectionSchema.fuzzy_signature_l, CollectionSchema.fuzzy_signature_unique_b}}) {
Expand All @@ -501,7 +502,7 @@ public SolrInputDocument storeDocument(
// lookup the document with the same signature
long signature = ((Long) vector.getField(checkfield.getSolrFieldName()).getValue()).longValue();
try {
if (this.fulltext.getDefaultConnector().existsByQuery(checkfield.getSolrFieldName() + ":\"" + Long.toString(signature) + "\"")) {
if (this.fulltext.getDefaultConnector().existsByQuery(CollectionSchema.host_id_s + ":\"" + hostid + "\" AND " + checkfield.getSolrFieldName() + ":\"" + Long.toString(signature) + "\"")) {
// change unique attribut in content
vector.setField(uniquefield.getSolrFieldName(), false);
}
Expand All @@ -511,7 +512,6 @@ public SolrInputDocument storeDocument(

// CHECK IF TITLE AND DESCRIPTION IS UNIQUE (this is by default not switched on)
if (this.fulltext.getDefaultConfiguration().contains(CollectionSchema.host_id_s)) {
String hostid = url.hosthash();
uniquecheck: for (CollectionSchema[] checkfields: new CollectionSchema[][]{
{CollectionSchema.title, CollectionSchema.title_exact_signature_l, CollectionSchema.title_unique_b},
{CollectionSchema.description, CollectionSchema.description_exact_signature_l, CollectionSchema.description_unique_b}}) {
Expand Down

0 comments on commit 566d6c9

Please sign in to comment.