Skip to content
Permalink
Browse files

Easier tracking of longest text snippets initializations

When text snippets statistics are enabled and FINE log level is enabled
on the TextSnippetStatistics class.
  • Loading branch information...
luccioman committed May 1, 2018
1 parent 3c4344c commit 3b89c232dbe3861244bba42edd4b43900a449ce5
@@ -1874,7 +1874,7 @@ private boolean drainSolrStackToResult(boolean concurrentSnippetFetch) {
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
final TextSnippet solrsnippet = new TextSnippet(node.url(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
final TextSnippet yacysnippet = new TextSnippet(this.loader,
node,
this.query.getQueryGoal().getIncludeHashes(),
@@ -154,13 +154,13 @@ public boolean fail() {
private ResultClass resultStatus;

public TextSnippet(
final byte[] urlhash,
final DigestURL url,
final String line,
final boolean isMarked,
final ResultClass errorCode,
final String errortext) {
long beginTime = System.currentTimeMillis();
init(urlhash, line, isMarked, errorCode, errortext, beginTime);
init(url, line, isMarked, errorCode, errortext, beginTime);
}

public TextSnippet(
@@ -177,7 +177,7 @@ public TextSnippet(
final DigestURL url = row.url();
if (queryhashes.isEmpty()) {
//System.out.println("found no queryhashes for URL retrieve " + url);
init(url.hash(), null, false, ResultClass.ERROR_NO_HASH_GIVEN, "no query hashes given", beginTime);
init(url, null, false, ResultClass.ERROR_NO_HASH_GIVEN, "no query hashes given", beginTime);
return;
}

@@ -188,7 +188,7 @@ public TextSnippet(
final String snippetLine = snippetsCache.get(wordhashes, urls);
if (snippetLine != null) {
// found the snippet
init(url.hash(), snippetLine, false, source, null, beginTime);
init(url, snippetLine, false, source, null, beginTime);
return;
}

@@ -239,7 +239,7 @@ public TextSnippet(
}
if (sentences == null) {
// not found the snippet
init(url.hash(), null, false, ResultClass.SOURCE_METADATA, null, beginTime);
init(url, null, false, ResultClass.SOURCE_METADATA, null, beginTime);
return;
}

@@ -249,7 +249,7 @@ public TextSnippet(
textline = tsr.getSnippet();
remainingHashes = tsr.getRemainingWords();
} catch (final UnsupportedOperationException e) {
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
init(url, null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
return;
}
}
@@ -293,7 +293,7 @@ public TextSnippet(
}
}
}
init(url.hash(), textline.length() > 0 ? textline : this.line, false, ResultClass.SOURCE_METADATA, null, beginTime);
init(url, textline.length() > 0 ? textline : this.line, false, ResultClass.SOURCE_METADATA, null, beginTime);
return;
}
sentences = null; // we don't need this here any more
@@ -309,12 +309,12 @@ public TextSnippet(
if (response == null) {
// in case that we did not get any result we can still return a success when we are not allowed to go online
if (cacheStrategy == null || cacheStrategy.mustBeOffline()) {
init(url.hash(), null, false, ResultClass.ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry", beginTime);
init(url, null, false, ResultClass.ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry", beginTime);
return;
}

// if it is still not available, report an error
init(url.hash(), null, false, ResultClass.ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry", beginTime);
init(url, null, false, ResultClass.ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry", beginTime);
return;
}

@@ -329,11 +329,11 @@ public TextSnippet(
try {
document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
} catch (final Parser.Failure e) {
init(url.hash(), null, false, ResultClass.ERROR_PARSER_FAILED, e.getMessage(), beginTime); // cannot be parsed
init(url, null, false, ResultClass.ERROR_PARSER_FAILED, e.getMessage(), beginTime); // cannot be parsed
return;
}
if (document == null) {
init(url.hash(), null, false, ResultClass.ERROR_PARSER_FAILED, "parser error/failed", beginTime); // cannot be parsed
init(url, null, false, ResultClass.ERROR_PARSER_FAILED, "parser error/failed", beginTime); // cannot be parsed
return;
}

@@ -342,7 +342,7 @@ public TextSnippet(
document.close();

if (sentences == null) {
init(url.hash(), null, false, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences", beginTime);
init(url, null, false, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences", beginTime);
return;
}

@@ -351,20 +351,20 @@ public TextSnippet(
textline = tsr.getSnippet();
remainingHashes = tsr.getRemainingWords();
} catch (final UnsupportedOperationException e) {
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
init(url, null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
return;
}
sentences = null;

if (textline == null || !remainingHashes.isEmpty()) {
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found", beginTime);
init(url, null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found", beginTime);
return;
}
if (textline.length() > snippetMaxLength) textline = textline.substring(0, snippetMaxLength);

// finally store this snippet in our own cache
snippetsCache.put(wordhashes, urls, textline);
init(url.hash(), textline, false, source, null, beginTime);
init(url, textline, false, source, null, beginTime);
}

/**
@@ -378,18 +378,18 @@ public TextSnippet(
* @param beginTime the time in milliseconds when TextSnippet creation started
*/
private void init(
final byte[] urlhash,
final DigestURL url,
final String line,
final boolean isMarked,
final ResultClass errorCode,
final String errortext,
final long beginTime) {
this.urlhash = urlhash;
this.urlhash = url.hash();
this.line = line;
this.isMarked = isMarked;
this.resultStatus = errorCode;
this.error = errortext;
TextSnippet.statistics.addTextSnippetStatistics(System.currentTimeMillis() - beginTime, this.resultStatus);
TextSnippet.statistics.addTextSnippetStatistics(url, System.currentTimeMillis() - beginTime, this.resultStatus);
}

/**
@@ -26,13 +26,18 @@
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.LongBinaryOperator;

import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.snippet.TextSnippet.ResultClass;

/**
* Handle statistics on TextSnippet processing.
*/
public class TextSnippetStatistics {

/** Logs handler */
private static final ConcurrentLog logger = new ConcurrentLog(TextSnippetStatistics.class.getName());

/** Total number of TextSnippet instances created since last JVM start */
private AtomicLong totalSnippets = new AtomicLong(0);
@@ -141,17 +146,21 @@ public long getTotalFromWeb() {
* @param resultStatus
* the snippet result status.
*/
public void addTextSnippetStatistics(final long initTime, final ResultClass resultStatus) {
public void addTextSnippetStatistics(final DigestURL url, final long initTime, final ResultClass resultStatus) {
if (this.enabled.get() && resultStatus != null) {
this.totalSnippets.incrementAndGet();
this.totalInitTime.addAndGet(initTime);
this.maxInitTime.accumulateAndGet(initTime, new LongBinaryOperator() {
if(initTime == this.maxInitTime.accumulateAndGet(initTime, new LongBinaryOperator() {

@Override
public long applyAsLong(long currentValue, long updateValue) {
return currentValue < updateValue ? updateValue : currentValue;
}
});
})) {
if(logger.isFine()) {
logger.fine("New max snippet init time : status " + resultStatus + " in " + initTime + " ms for URL " + url);
}
}

if (resultStatus != null) {
switch (resultStatus) {
@@ -133,7 +133,7 @@ public void testDescriptionline() throws MalformedURLException {

// test with raw line (no marking added by YaCy)
TextSnippet ts = new TextSnippet(
url.hash(),
url,
rawtestline,
true, // isMarked,
TextSnippet.ResultClass.SOURCE_METADATA, "");
@@ -144,7 +144,7 @@ public void testDescriptionline() throws MalformedURLException {

// test with marking of query word
ts = new TextSnippet(
url.hash(),
url,
rawtestline,
false, // isMarked,
TextSnippet.ResultClass.SOURCE_METADATA, "");
@@ -157,7 +157,7 @@ public void testDescriptionline() throws MalformedURLException {
// test text with some numbers (english/german format)
rawtestline = "Test Version 1.83 calculates pi to 3,14 always";
ts = new TextSnippet(
url.hash(),
url,
rawtestline,
false, // isMarked,
TextSnippet.ResultClass.SOURCE_METADATA, "");

0 comments on commit 3b89c23

Please sign in to comment.
You can’t perform that action at this time.