Skip to content

Commit

Permalink
if the webgraph is used, then use it also for reference computation to
Browse files Browse the repository at this point in the history
avoid contradictions with references_i in the collection index.
  • Loading branch information
Orbiter committed Jul 24, 2014
1 parent 6e1dc44 commit f94c913
Showing 1 changed file with 22 additions and 25 deletions.
47 changes: 22 additions & 25 deletions source/net/yacy/search/index/Segment.java
Expand Up @@ -241,31 +241,7 @@ public ReferenceReport(final byte[] id, final boolean acceptSelfReference) throw
this.externalHosts = new RowHandleSet(6, Base64Order.enhancedCoder, 0);
this.internalIDs = new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 0);
this.externalIDs = new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 0);
try {
if (connectedCitation()) {
// read the references from the citation index
ReferenceContainer<CitationReference> references;
references = urlCitation().get(id, null);
if (references == null) return; // no references at all
Iterator<CitationReference> ri = references.entries();
while (ri.hasNext()) {
CitationReference ref = ri.next();
byte[] hh = ref.hosthash(); // host hash
if (ByteBuffer.equals(hh, 0, id, 6, 6)) {
internalIDs.put(ref.urlhash());
internal++;
} else {
externalHosts.put(hh);
externalIDs.put(ref.urlhash());
external++;
}
}
}
} catch (SpaceExceededException e) {
// the Citation Index got too large, we ignore the problem and hope that a second solr index is attached which will take over now
if (Segment.this.fulltext.useWebgraph()) internalIDs.clear();
}
if ((internalIDs.size() == 0 || !connectedCitation()) && Segment.this.fulltext.useWebgraph()) {
if (Segment.this.fulltext.useWebgraph()) {
// reqd the references from the webgraph
SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, WebgraphSchema.source_id_s.getSolrFieldName());
Expand All @@ -292,6 +268,27 @@ public ReferenceReport(final byte[] id, final boolean acceptSelfReference) throw
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
}
} else if (connectedCitation()) try {
// read the references from the citation index
ReferenceContainer<CitationReference> references;
references = urlCitation().get(id, null);
if (references == null) return; // no references at all
Iterator<CitationReference> ri = references.entries();
while (ri.hasNext()) {
CitationReference ref = ri.next();
byte[] hh = ref.hosthash(); // host hash
if (ByteBuffer.equals(hh, 0, id, 6, 6)) {
internalIDs.put(ref.urlhash());
internal++;
} else {
externalHosts.put(hh);
externalIDs.put(ref.urlhash());
external++;
}
}
} catch (SpaceExceededException e) {
// the Citation Index got too large, we ignore the problem and hope that a second solr index is attached which will take over now
if (Segment.this.fulltext.useWebgraph()) internalIDs.clear();
}
this.externalHosts.optimize();
this.internalIDs.optimize();
Expand Down

0 comments on commit f94c913

Please sign in to comment.