Skip to content

Commit

Permalink
Fixed surrogates import monitoring page (/CrawlResults.html?process=7)
Browse files Browse the repository at this point in the history
This page was always empty, as described in mantis 740
(http://mantis.tokeek.de/view.php?id=740)
  • Loading branch information
luccioman committed Apr 24, 2017
1 parent 527d494 commit b1da926
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
3 changes: 2 additions & 1 deletion source/net/yacy/crawler/retrieval/Response.java
Expand Up @@ -828,7 +828,8 @@ public EventOrigin processCase(final String mySeedHash) {
// 3) result of index transfer, some of them are here (not possible here) // 3) result of index transfer, some of them are here (not possible here)
// 4) proxy-load (initiator is "------------") // 4) proxy-load (initiator is "------------")
// 5) local prefetch/crawling (initiator is own seedHash) // 5) local prefetch/crawling (initiator is own seedHash)
// 6) local fetching for global crawling (other known or unknwon initiator) // 6) local fetching for global crawling (other known or unknown initiator)
// 7) local surrogates processing (can not be known here : crawl profile is required)
EventOrigin processCase = EventOrigin.UNKNOWN; EventOrigin processCase = EventOrigin.UNKNOWN;
// FIXME the equals seems to be incorrect: String.equals(boolean) // FIXME the equals seems to be incorrect: String.equals(boolean)
if (initiator() == null || initiator().length == 0 || ASCII.String(initiator()).equals("------------")) { if (initiator() == null || initiator().length == 0 || ASCII.String(initiator()).equals("------------")) {
Expand Down
19 changes: 17 additions & 2 deletions source/net/yacy/search/Switchboard.java
Expand Up @@ -2114,14 +2114,23 @@ public void run() {
// enrich the surrogate // enrich the surrogate
final String id = (String) surrogate.getFieldValue(CollectionSchema.id.getSolrFieldName()); final String id = (String) surrogate.getFieldValue(CollectionSchema.id.getSolrFieldName());
final String text = (String) surrogate.getFieldValue(CollectionSchema.text_t.getSolrFieldName()); final String text = (String) surrogate.getFieldValue(CollectionSchema.text_t.getSolrFieldName());
final DigestURL rootURL = new DigestURL((String) surrogate.getFieldValue(CollectionSchema.sku.getSolrFieldName()), ASCII.getBytes(id));
if (text != null && text.length() > 0 && id != null ) { if (text != null && text.length() > 0 && id != null ) {
final DigestURL root = new DigestURL((String) surrogate.getFieldValue(CollectionSchema.sku.getSolrFieldName()), ASCII.getBytes(id));
// run the tokenizer on the text to get vocabularies and synonyms // run the tokenizer on the text to get vocabularies and synonyms
final Tokenizer tokenizer = new Tokenizer(root, text, LibraryProvider.dymLib, true, scraper); final Tokenizer tokenizer = new Tokenizer(rootURL, text, LibraryProvider.dymLib, true, scraper);
final Map<String, Set<String>> facets = Document.computeGenericFacets(tokenizer.tags()); final Map<String, Set<String>> facets = Document.computeGenericFacets(tokenizer.tags());
// overwrite the given vocabularies and synonyms with new computed ones // overwrite the given vocabularies and synonyms with new computed ones
Switchboard.this.index.fulltext().getDefaultConfiguration().enrich(surrogate, tokenizer.synonyms(), facets); Switchboard.this.index.fulltext().getDefaultConfiguration().enrich(surrogate, tokenizer.synonyms(), facets);
} }

/* Update the ResultURLS stack for monitoring */
final byte[] myPeerHash = ASCII.getBytes(peers.mySeed().hash);
ResultURLs.stack(
ASCII.String(rootURL.hash()),
rootURL.getHost(),
myPeerHash,
myPeerHash,
EventOrigin.SURROGATES);
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }
Expand Down Expand Up @@ -3034,6 +3043,12 @@ private void storeDocumentIndex(
final DigestURL url = document.dc_source(); final DigestURL url = document.dc_source();
final DigestURL referrerURL = queueEntry.referrerURL(); final DigestURL referrerURL = queueEntry.referrerURL();
EventOrigin processCase = queueEntry.processCase(this.peers.mySeed().hash); EventOrigin processCase = queueEntry.processCase(this.peers.mySeed().hash);

/* This entry may have been locally created by the MediaWiki dump reader :
* we can distinguish the case here from a regular local crawl with the crawl profile used */
if(this.crawler != null && queueEntry.profile() == this.crawler.defaultSurrogateProfile) {
processCase = EventOrigin.SURROGATES;
}
CrawlProfile profile = queueEntry.profile(); CrawlProfile profile = queueEntry.profile();


if (condenser == null || (document.indexingDenied() && profile.obeyHtmlRobotsNoindex())) { if (condenser == null || (document.indexingDenied() && profile.obeyHtmlRobotsNoindex())) {
Expand Down

0 comments on commit b1da926

Please sign in to comment.