Skip to content
Permalink
Browse files

Added analysis optional setting to compute statistics on text snippets

Thus producing some basic stats on processing times for snippets
generation and counts on snippets per source type.
  • Loading branch information...
luccioman committed Apr 15, 2018
1 parent 508050f commit a3ec7a7a5f7cc6d96e7d6bb2656b7c466e83ab28
@@ -500,6 +500,8 @@ debug.search.remote.dht.off=false
debug.search.remote.dht.testlocal=false
debug.search.remote.solr.off=false
debug.search.remote.solr.testlocal=false
# Set to true to enable computation of statistics on text snippets processing
debug.snippets.statistics.enabled=false

#staticIP if you have a static IP, you can use this setting
staticIP=
@@ -87,6 +87,23 @@ <h2>Integration of a Search Portal</h2>
<dt>Snippet Fetch Strategy &amp; Link Verification</dt>
<dd>
<img src="env/grafics/idea.png" width="32" height="32" alt="idea" align="center"/>Speed up search results with this option! (use CACHEONLY or FALSE to switch off verification)<br/>
#(debug.snippets.statistics.enabled)#<i>Statistics on text snippets generation can be enabled in the <a href="Settings_p.html?page=debug">Debug/Analysis Settings</a> page.</i>
::<div class="info" style="float:left; margin-right : 0.1em;">
<img src="env/grafics/i16.gif" width="16" height="16" alt="Detailed statistics"/>
<div class="infobox">
Counts by origin :
<ul>
<li>#[totalFromCache]# provided by Solr</li>
<li>#[totalFromCache]# from cache</li>
<li>#[totalFromMetadata]# computed from indexed metadata</li>
<li>#[totalFromWeb]# from original documents fetched and parsed</li>
<li>#[totalFailures]# failures</li>
</ul>
</div>
</div>
<i>#[totalSnippets]# text snippets were generated since last server startup, in a mean time of #[snippetsMeanTime]# and a maximum of #[snippetsMaxTime]#.</i>
#(/debug.snippets.statistics.enabled)#
<br/>
<input type="radio" name="search.verify" value="nocache" #(search.verify.nocache)#::checked="checked"#(/search.verify.nocache)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/> NOCACHE: no use of web cache, load all snippets online<br/>
<input type="radio" name="search.verify" value="iffresh" #(search.verify.iffresh)#::checked="checked"#(/search.verify.iffresh)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/> IFFRESH: use the cache if the cache exists and is fresh otherwise load online<br/>
<input type="radio" name="search.verify" value="ifexist" #(search.verify.ifexist)#::checked="checked"#(/search.verify.ifexist)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/> IFEXIST: use the cache if the cache exist or load online<br/>
@@ -30,6 +30,7 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.MalformedURLException;
import java.time.Duration;
import java.util.Properties;

import net.yacy.cora.document.id.DigestURL;
@@ -41,6 +42,7 @@
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.server.http.HTTPDFileHandler;
@@ -218,6 +220,11 @@ public static serverObjects respond(final RequestHeader header, final serverObje
prop.put(SwitchboardConstants.REMOTESEARCH_HTTPS_PREFERRED,
sb.getConfigBool(SwitchboardConstants.REMOTESEARCH_HTTPS_PREFERRED,
SwitchboardConstants.REMOTESEARCH_HTTPS_PREFERRED_DEFAULT) ? 1 : 0);

final boolean textSnippetsStatisticsEnabled = sb.getConfigBool(
SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED,
SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED_DEFAULT);
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED, textSnippetsStatisticsEnabled);

prop.put(SwitchboardConstants.GREEDYLEARNING_ACTIVE, sb.getConfigBool(SwitchboardConstants.GREEDYLEARNING_ACTIVE, false) ? 1 : 0);
prop.put(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, sb.getConfig(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, "0"));
@@ -229,6 +236,28 @@ public static serverObjects respond(final RequestHeader header, final serverObje
} else {
prop.put(SwitchboardConstants.REMOTESEARCH_RESULT_STORE_MAXSIZE, "");
}

/* Provide some basic stats about text snippets generation time to help choosing snippet options */
if(textSnippetsStatisticsEnabled) {
final long totalSnippets = TextSnippet.statistics.getTotalSnippets();
final long totalSnippetsInitTime = TextSnippet.statistics.getTotalInitTime();
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalSnippets", totalSnippets);
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFromSolr",
TextSnippet.statistics.getTotalFromSolr());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFromCache",
TextSnippet.statistics.getTotalFromCache());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFromMetadata",
TextSnippet.statistics.getTotalFromMetadata());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFromWeb",
TextSnippet.statistics.getTotalFromWeb());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFailures",
TextSnippet.statistics.getTotalFailures());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_snippetsMeanTime",
formatDuration(totalSnippets > 0 ? totalSnippetsInitTime / totalSnippets : 0));

prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_snippetsMaxTime",
formatDuration(TextSnippet.statistics.getMaxInitTime()));
}

prop.put("search.verify.nocache", sb.getConfig("search.verify", "").equals("nocache") ? 1 : 0);
prop.put("search.verify.iffresh", sb.getConfig("search.verify", "").equals("iffresh") ? 1 : 0);
@@ -279,4 +308,20 @@ public static serverObjects respond(final RequestHeader header, final serverObje
return prop;
}

/**
* @param durationValue a duration in milliseconds
* @return the duration value formatted for display with its time unit
*/
private static String formatDuration(final long durationValue) {
final Duration duration = Duration.ofMillis(durationValue);

final String formattedDuration;
if(duration.getSeconds() > 0) {
formattedDuration = duration.getSeconds() + "s";
} else {
formattedDuration = duration.toMillis() + "ms";
}
return formattedDuration;
}

}
@@ -43,6 +43,7 @@
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.server.serverCore;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@@ -580,6 +581,10 @@ else if (!filter.equals("*")){
tickedCheckbox = post.containsKey("searchShowRanking");
env.setConfig(SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING, tickedCheckbox);

tickedCheckbox = post.containsKey(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED);
sb.setConfig(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED, tickedCheckbox);
TextSnippet.statistics.setEnabled(tickedCheckbox);

/* For easier user understanding, the following flags controlling data sources selection
* are rendered in the UI as checkboxes corresponding to enabled value when ticked */
tickedCheckbox = post.containsKey("searchLocalDHT");
@@ -138,6 +138,26 @@
</div>
</fieldset>

<fieldset>
<legend>Text snippets statistics</legend>

<div class="form-group">
<div class="col-sm-4">
<div class="checkbox">
<label>
<input name="debug.snippets.statistics.enabled" id="snippetsStatsEnabled"
type="checkbox" #(debug.snippets.statistics.enabled)#::checked#(/debug.snippets.statistics.enabled)#
aria-describedby="snippetStatisticsInfo"/>
Enable text snippets statistics
</label>
</div>
</div>
<div class="col-sm-8" id="snippetStatisticsInfo">
When checked, statistics are collected on text snippets generation for search results. The are resumed in the <a href="ConfigPortal_p.html">Portal Configuration</a> page.
</div>
</div>
</fieldset>

<div class="col-sm-6">
<input type="submit" class="btn btn-primary" name="debugAnalysisSettings" value="Submit" aria-describedby="submitInfo"/>
<em id="submitInfo">Changes will take effect immediately.</em>
@@ -241,6 +241,10 @@ else if (page.equals("crawler")) {

prop.put("searchShowRankingChecked", env.getConfigBool(SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING, SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING_DEFAULT) ? 1 : 0);

prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED,
sb.getConfigBool(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED,
SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED_DEFAULT));

// return rewrite properties
return prop;
}
@@ -225,6 +225,7 @@
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphConfiguration;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.server.serverCore;
import net.yacy.server.serverSwitch;
import net.yacy.server.http.RobotsTxtConfig;
@@ -959,6 +960,9 @@ public void run() {

// generate snippets cache
this.log.config("Initializing Snippet Cache");

TextSnippet.statistics.setEnabled(getConfigBool(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED,
SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED_DEFAULT));

// init the wiki
wikiParser = new WikiCode();
@@ -374,6 +374,12 @@
/** when set to true : do not use dht, search local peer in a shortcut to the own server */
public static final String DEBUG_SEARCH_REMOTE_SOLR_TESTLOCAL= "debug.search.remote.solr.testlocal";

/** Key of the setting controlling whether text snippets statistics should be computed */
public static final String DEBUG_SNIPPETS_STATISTICS_ENABLED = "debug.snippets.statistics.enabled";

/** Default value for the setting controlling whether text snippets statistics should be computed */
public static final boolean DEBUG_SNIPPETS_STATISTICS_ENABLED_DEFAULT = false;

/**
* <p><code>public static final String <strong>WORDCACHE_MAX_COUNT</strong> = "wordCacheMaxCount"</code></p>
* <p>Name of the setting how many words the word-cache (or DHT-Out cache) shall contain maximal. Indexing pages if the
@@ -1874,7 +1874,7 @@ private boolean drainSolrStackToResult(boolean concurrentSnippetFetch) {
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_CACHE, "");
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
final TextSnippet yacysnippet = new TextSnippet(this.loader,
node,
this.query.getQueryGoal().getIncludeHashes(),

0 comments on commit a3ec7a7

Please sign in to comment.
You can’t perform that action at this time.