Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added analysis optional setting to compute statistics on text snippets
Thus producing some basic stats on processing times for snippets
generation and counts on snippets per source type.
  • Loading branch information
luccioman committed Apr 15, 2018
1 parent 508050f commit a3ec7a7
Show file tree
Hide file tree
Showing 11 changed files with 347 additions and 19 deletions.
2 changes: 2 additions & 0 deletions defaults/yacy.init
Expand Up @@ -500,6 +500,8 @@ debug.search.remote.dht.off=false
debug.search.remote.dht.testlocal=false
debug.search.remote.solr.off=false
debug.search.remote.solr.testlocal=false
# Set to true to enable computation of statistics on text snippets processing
debug.snippets.statistics.enabled=false

#staticIP if you have a static IP, you can use this setting
staticIP=
Expand Down
17 changes: 17 additions & 0 deletions htroot/ConfigPortal_p.html
Expand Up @@ -87,6 +87,23 @@ <h2>Integration of a Search Portal</h2>
<dt>Snippet Fetch Strategy &amp; Link Verification</dt>
<dd>
<img src="env/grafics/idea.png" width="32" height="32" alt="idea" align="center"/>Speed up search results with this option! (use CACHEONLY or FALSE to switch off verification)<br/>
#(debug.snippets.statistics.enabled)#<i>Statistics on text snippets generation can be enabled in the <a href="Settings_p.html?page=debug">Debug/Analysis Settings</a> page.</i>
::<div class="info" style="float:left; margin-right : 0.1em;">
<img src="env/grafics/i16.gif" width="16" height="16" alt="Detailed statistics"/>
<div class="infobox">
Counts by origin :
<ul>
<li>#[totalFromCache]# provided by Solr</li>
<li>#[totalFromCache]# from cache</li>
<li>#[totalFromMetadata]# computed from indexed metadata</li>
<li>#[totalFromWeb]# from original documents fetched and parsed</li>
<li>#[totalFailures]# failures</li>
</ul>
</div>
</div>
<i>#[totalSnippets]# text snippets were generated since last server startup, in a mean time of #[snippetsMeanTime]# and a maximum of #[snippetsMaxTime]#.</i>
#(/debug.snippets.statistics.enabled)#
<br/>
<input type="radio" name="search.verify" value="nocache" #(search.verify.nocache)#::checked="checked"#(/search.verify.nocache)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/> NOCACHE: no use of web cache, load all snippets online<br/>
<input type="radio" name="search.verify" value="iffresh" #(search.verify.iffresh)#::checked="checked"#(/search.verify.iffresh)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/> IFFRESH: use the cache if the cache exists and is fresh otherwise load online<br/>
<input type="radio" name="search.verify" value="ifexist" #(search.verify.ifexist)#::checked="checked"#(/search.verify.ifexist)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/> IFEXIST: use the cache if the cache exist or load online<br/>
Expand Down
45 changes: 45 additions & 0 deletions htroot/ConfigPortal_p.java
Expand Up @@ -30,6 +30,7 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.MalformedURLException;
import java.time.Duration;
import java.util.Properties;

import net.yacy.cora.document.id.DigestURL;
Expand All @@ -41,6 +42,7 @@
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.server.http.HTTPDFileHandler;
Expand Down Expand Up @@ -218,6 +220,11 @@ public static serverObjects respond(final RequestHeader header, final serverObje
prop.put(SwitchboardConstants.REMOTESEARCH_HTTPS_PREFERRED,
sb.getConfigBool(SwitchboardConstants.REMOTESEARCH_HTTPS_PREFERRED,
SwitchboardConstants.REMOTESEARCH_HTTPS_PREFERRED_DEFAULT) ? 1 : 0);

final boolean textSnippetsStatisticsEnabled = sb.getConfigBool(
SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED,
SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED_DEFAULT);
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED, textSnippetsStatisticsEnabled);

prop.put(SwitchboardConstants.GREEDYLEARNING_ACTIVE, sb.getConfigBool(SwitchboardConstants.GREEDYLEARNING_ACTIVE, false) ? 1 : 0);
prop.put(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, sb.getConfig(SwitchboardConstants.GREEDYLEARNING_LIMIT_DOCCOUNT, "0"));
Expand All @@ -229,6 +236,28 @@ public static serverObjects respond(final RequestHeader header, final serverObje
} else {
prop.put(SwitchboardConstants.REMOTESEARCH_RESULT_STORE_MAXSIZE, "");
}

/* Provide some basic stats about text snippets generation time to help choosing snippet options */
if(textSnippetsStatisticsEnabled) {
final long totalSnippets = TextSnippet.statistics.getTotalSnippets();
final long totalSnippetsInitTime = TextSnippet.statistics.getTotalInitTime();
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalSnippets", totalSnippets);
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFromSolr",
TextSnippet.statistics.getTotalFromSolr());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFromCache",
TextSnippet.statistics.getTotalFromCache());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFromMetadata",
TextSnippet.statistics.getTotalFromMetadata());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFromWeb",
TextSnippet.statistics.getTotalFromWeb());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_totalFailures",
TextSnippet.statistics.getTotalFailures());
prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_snippetsMeanTime",
formatDuration(totalSnippets > 0 ? totalSnippetsInitTime / totalSnippets : 0));

prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED + "_snippetsMaxTime",
formatDuration(TextSnippet.statistics.getMaxInitTime()));
}

prop.put("search.verify.nocache", sb.getConfig("search.verify", "").equals("nocache") ? 1 : 0);
prop.put("search.verify.iffresh", sb.getConfig("search.verify", "").equals("iffresh") ? 1 : 0);
Expand Down Expand Up @@ -279,4 +308,20 @@ public static serverObjects respond(final RequestHeader header, final serverObje
return prop;
}

/**
* @param durationValue a duration in milliseconds
* @return the duration value formatted for display with its time unit
*/
private static String formatDuration(final long durationValue) {
final Duration duration = Duration.ofMillis(durationValue);

final String formattedDuration;
if(duration.getSeconds() > 0) {
formattedDuration = duration.getSeconds() + "s";
} else {
formattedDuration = duration.toMillis() + "ms";
}
return formattedDuration;
}

}
5 changes: 5 additions & 0 deletions htroot/SettingsAck_p.java
Expand Up @@ -43,6 +43,7 @@
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.server.serverCore;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
Expand Down Expand Up @@ -580,6 +581,10 @@ else if (!filter.equals("*")){
tickedCheckbox = post.containsKey("searchShowRanking");
env.setConfig(SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING, tickedCheckbox);

tickedCheckbox = post.containsKey(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED);
sb.setConfig(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED, tickedCheckbox);
TextSnippet.statistics.setEnabled(tickedCheckbox);

/* For easier user understanding, the following flags controlling data sources selection
* are rendered in the UI as checkboxes corresponding to enabled value when ticked */
tickedCheckbox = post.containsKey("searchLocalDHT");
Expand Down
20 changes: 20 additions & 0 deletions htroot/Settings_Debug.inc
Expand Up @@ -138,6 +138,26 @@
</div>
</fieldset>

<fieldset>
<legend>Text snippets statistics</legend>

<div class="form-group">
<div class="col-sm-4">
<div class="checkbox">
<label>
<input name="debug.snippets.statistics.enabled" id="snippetsStatsEnabled"
type="checkbox" #(debug.snippets.statistics.enabled)#::checked#(/debug.snippets.statistics.enabled)#
aria-describedby="snippetStatisticsInfo"/>
Enable text snippets statistics
</label>
</div>
</div>
<div class="col-sm-8" id="snippetStatisticsInfo">
When checked, statistics are collected on text snippets generation for search results. The are resumed in the <a href="ConfigPortal_p.html">Portal Configuration</a> page.
</div>
</div>
</fieldset>

<div class="col-sm-6">
<input type="submit" class="btn btn-primary" name="debugAnalysisSettings" value="Submit" aria-describedby="submitInfo"/>
<em id="submitInfo">Changes will take effect immediately.</em>
Expand Down
4 changes: 4 additions & 0 deletions htroot/Settings_p.java
Expand Up @@ -241,6 +241,10 @@ else if (page.equals("crawler")) {

prop.put("searchShowRankingChecked", env.getConfigBool(SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING, SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING_DEFAULT) ? 1 : 0);

prop.put(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED,
sb.getConfigBool(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED,
SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED_DEFAULT));

// return rewrite properties
return prop;
}
Expand Down
4 changes: 4 additions & 0 deletions source/net/yacy/search/Switchboard.java
Expand Up @@ -225,6 +225,7 @@
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphConfiguration;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.server.serverCore;
import net.yacy.server.serverSwitch;
import net.yacy.server.http.RobotsTxtConfig;
Expand Down Expand Up @@ -959,6 +960,9 @@ public void run() {

// generate snippets cache
this.log.config("Initializing Snippet Cache");

TextSnippet.statistics.setEnabled(getConfigBool(SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED,
SwitchboardConstants.DEBUG_SNIPPETS_STATISTICS_ENABLED_DEFAULT));

// init the wiki
wikiParser = new WikiCode();
Expand Down
6 changes: 6 additions & 0 deletions source/net/yacy/search/SwitchboardConstants.java
Expand Up @@ -374,6 +374,12 @@ public final class SwitchboardConstants {
/** when set to true : do not use dht, search local peer in a shortcut to the own server */
public static final String DEBUG_SEARCH_REMOTE_SOLR_TESTLOCAL= "debug.search.remote.solr.testlocal";

/** Key of the setting controlling whether text snippets statistics should be computed */
public static final String DEBUG_SNIPPETS_STATISTICS_ENABLED = "debug.snippets.statistics.enabled";

/** Default value for the setting controlling whether text snippets statistics should be computed */
public static final boolean DEBUG_SNIPPETS_STATISTICS_ENABLED_DEFAULT = false;

/**
* <p><code>public static final String <strong>WORDCACHE_MAX_COUNT</strong> = "wordCacheMaxCount"</code></p>
* <p>Name of the setting how many words the word-cache (or DHT-Out cache) shall contain maximal. Indexing pages if the
Expand Down
2 changes: 1 addition & 1 deletion source/net/yacy/search/query/SearchEvent.java
Expand Up @@ -1874,7 +1874,7 @@ private boolean drainSolrStackToResult(boolean concurrentSnippetFetch) {
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_CACHE, "");
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
final TextSnippet yacysnippet = new TextSnippet(this.loader,
node,
this.query.getQueryGoal().getIncludeHashes(),
Expand Down

0 comments on commit a3ec7a7

Please sign in to comment.