Skip to content

Commit

Permalink
fixed indexing log statistics
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4953 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
lotus committed Jun 24, 2008
1 parent dba7ba0 commit 2dc7c00
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 47 deletions.
6 changes: 2 additions & 4 deletions htroot/LogStatistics_p.html
Expand Up @@ -83,10 +83,8 @@ <h2>Log Statistics</h2>
<dt>Size of indexed sites:</dt><dd>#[indexedSiteSizeSum]# #[indexedSiteSizeSumUnit]#</dd>
<dt>Indexed words:</dt><dd>#[indexedWords]#</dd>
<dt>Indexed anchors:</dt><dd>#[indexedAnchors]#</dd>
<dt>Total stacking time:</dt><dd>#[indexedStackingTime]# #[indexedStackingTimeUnit]#</dd>
<dt>Total parsing time:</dt><dd>#[indexedParsingTime]# #[indexedParsingTimeUnit]#</dd>
<dt>Total indexing time:</dt><dd>#[indexedIndexingTime]# #[indexedIndexingTimeUnit]#</dd>
<dt>Total storage time:</dt><dd>#[indexedStorageTime]# #[indexedStorageTimeUnit]#</dd>
<dt>Total link storage time:</dt><dd>#[indexedLinkStorageTime]# #[indexedLinkStorageTimeUnit]#</dd>
<dt>Total index storage time:</dt><dd>#[indexedIndexStorageTime]# #[indexedIndexStorageTimeUnit]#</dd>
</dl>
</fieldset>

Expand Down
30 changes: 18 additions & 12 deletions htroot/LogStatistics_p.java
Expand Up @@ -107,22 +107,28 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put(RESULTS + LogParserPLASMA.ERROR_CHILD_TWICE_RIGHT, (Integer) r.get(LogParserPLASMA.ERROR_CHILD_TWICE_RIGHT));
prop.put(RESULTS + LogParserPLASMA.ERROR_MALFORMED_URL, (Integer) r.get(LogParserPLASMA.ERROR_MALFORMED_URL));
prop.put(RESULTS + LogParserPLASMA.INDEXED_ANCHORS, (Integer) r.get(LogParserPLASMA.INDEXED_ANCHORS));
t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_INDEX_TIME)).longValue());
prop.put(RESULTS + LogParserPLASMA.INDEXED_INDEX_TIME, t[0]);
prop.put(RESULTS + LogParserPLASMA.INDEXED_INDEX_TIME + "Unit", t[1]);
t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_PARSE_TIME)).longValue());
prop.put(RESULTS + LogParserPLASMA.INDEXED_PARSE_TIME, t[0]);
prop.put(RESULTS + LogParserPLASMA.INDEXED_PARSE_TIME + "Unit", t[1]);
// t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_INDEX_TIME)).longValue());
// prop.put(RESULTS + LogParserPLASMA.INDEXED_INDEX_TIME, t[0]);
// prop.put(RESULTS + LogParserPLASMA.INDEXED_INDEX_TIME + "Unit", t[1]);
// t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_PARSE_TIME)).longValue());
// prop.put(RESULTS + LogParserPLASMA.INDEXED_PARSE_TIME, t[0]);
// prop.put(RESULTS + LogParserPLASMA.INDEXED_PARSE_TIME + "Unit", t[1]);
prop.put(RESULTS + LogParserPLASMA.INDEXED_SITES, (Integer) r.get(LogParserPLASMA.INDEXED_SITES));
t = transformMem(((Integer)r.get(LogParserPLASMA.INDEXED_SITES_SIZE)).longValue());
prop.put(RESULTS + LogParserPLASMA.INDEXED_SITES_SIZE, t[0]);
prop.put(RESULTS + LogParserPLASMA.INDEXED_SITES_SIZE + "Unit", t[1]);
t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_STACK_TIME)).longValue());
prop.put(RESULTS + LogParserPLASMA.INDEXED_STACK_TIME, t[0]);
prop.put(RESULTS + LogParserPLASMA.INDEXED_STACK_TIME + "Unit", t[1]);
t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_STORE_TIME)).longValue());
prop.put(RESULTS + LogParserPLASMA.INDEXED_STORE_TIME, t[0]);
prop.put(RESULTS + LogParserPLASMA.INDEXED_STORE_TIME + "Unit", t[1]);
// t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_STACK_TIME)).longValue());
// prop.put(RESULTS + LogParserPLASMA.INDEXED_STACK_TIME, t[0]);
// prop.put(RESULTS + LogParserPLASMA.INDEXED_STACK_TIME + "Unit", t[1]);
// t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_STORE_TIME)).longValue());
// prop.put(RESULTS + LogParserPLASMA.INDEXED_STORE_TIME, t[0]);
// prop.put(RESULTS + LogParserPLASMA.INDEXED_STORE_TIME + "Unit", t[1]);
t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_LINKSTORE_TIME)).longValue());
prop.put(RESULTS + LogParserPLASMA.INDEXED_LINKSTORE_TIME, t[0]);
prop.put(RESULTS + LogParserPLASMA.INDEXED_LINKSTORE_TIME + "Unit", t[1]);
t = transformTime(((Integer)r.get(LogParserPLASMA.INDEXED_INDEXSTORE_TIME)).longValue());
prop.put(RESULTS + LogParserPLASMA.INDEXED_INDEXSTORE_TIME, t[0]);
prop.put(RESULTS + LogParserPLASMA.INDEXED_INDEXSTORE_TIME + "Unit", t[1]);
prop.put(RESULTS + LogParserPLASMA.INDEXED_WORDS, (Integer) r.get(LogParserPLASMA.INDEXED_WORDS));
prop.put(RESULTS + LogParserPLASMA.PEERS_BUSY, (Integer) r.get(LogParserPLASMA.PEERS_BUSY));
prop.put(RESULTS + LogParserPLASMA.PEERS_TOO_LESS, (Integer) r.get(LogParserPLASMA.PEERS_TOO_LESS));
Expand Down
77 changes: 46 additions & 31 deletions source/de/anomic/server/logging/logParsers/LogParserPLASMA.java
Expand Up @@ -156,22 +156,28 @@ public class LogParserPLASMA implements LogParser{
/** total amount of indexed anchors - <strong>Integer</strong> */
public static final String INDEXED_ANCHORS = "indexedAnchors";

/** total time needed for stacking the site of an indexing - <strong>Integer</strong> */
public static final String INDEXED_STACK_TIME = "indexedStackingTime";

/** total time needed for parsing during indexing - <strong>Integer</strong> */
public static final String INDEXED_PARSE_TIME = "indexedParsingTime";

/** total time needed for the actual indexing during indexing - <strong>Integer</strong> */
public static final String INDEXED_INDEX_TIME = "indexedIndexingTime";

/** total time needed for storing the results of an indexing - <strong>Integer</strong> */
public static final String INDEXED_STORE_TIME = "indexedStorageTime";
// /** total time needed for stacking the site of an indexing - <strong>Integer</strong> */
// public static final String INDEXED_STACK_TIME = "indexedStackingTime";
//
// /** total time needed for parsing during indexing - <strong>Integer</strong> */
// public static final String INDEXED_PARSE_TIME = "indexedParsingTime";
//
// /** total time needed for the actual indexing during indexing - <strong>Integer</strong> */
// public static final String INDEXED_INDEX_TIME = "indexedIndexingTime";
//
// /** total time needed for storing the results of an indexing - <strong>Integer</strong> */
// public static final String INDEXED_STORE_TIME = "indexedStorageTime";

/** total time needed for storing the results of a link indexing - <strong>Integer</strong> */
public static final String INDEXED_LINKSTORE_TIME = "indexedLinkStorageTime";

/** total time needed for storing the results of a word indexing - <strong>Integer</strong> */
public static final String INDEXED_INDEXSTORE_TIME = "indexedIndexStorageTime";

/** accumulated time needed to parse the log entries up to now (in ms)*/
public static final String TOTAL_PARSER_TIME = "totalParserTime";

/** times the parser was called, respectively amount of independant log-lines */
/** times the parser was called, respectively amount of independent log-lines */
public static final String TOTAL_PARSER_RUNS = "totalParserRuns";


Expand Down Expand Up @@ -207,7 +213,8 @@ public class LogParserPLASMA implements LogParser{
"\\*Indexed (\\d+) words in URL [\\w:.&/%-~;$\u00A7@=]* \\[[\\w_-]{12}\\]\\r?\\n?" +
"\\tDescription: +([\\w-\\.,:!='\"|/+@\\(\\) \\t]*)\\r?\\n?" +
"\\tMimeType: ([\\w_~/-]*) \\| Charset: ([\\w-]*) \\| Size: (\\d+) bytes \\| Anchors: (\\d+)\\r?\\n?" +
"\\tStackingTime:[ ]*(\\d+) ms \\| ParsingTime:[ ]*(\\d+) ms \\| IndexingTime: (\\d+) ms \\| StorageTime: (\\d+) ms");
"\\tLinkStorageTime: (\\d+) ms \\| indexStorageTime: (\\d+) ms");
//"\\tStackingTime:[ ]*(\\d+) ms \\| ParsingTime:[ ]*(\\d+) ms \\| IndexingTime: (\\d+) ms \\| StorageTime: (\\d+) ms");

private int urlSum=0;
private int urlReqSum=0;
Expand Down Expand Up @@ -243,10 +250,12 @@ public class LogParserPLASMA implements LogParser{
private int indexedWordSum = 0;
private int indexedSiteSizeSum = 0;
private int indexedAnchorsCount = 0;
private int indexedStackingTime = 0;
private int indexedParsingTime = 0;
private int indexedIndexingTime = 0;
private int indexedStorageTime = 0;
private int indexedLinkStorageTime = 0;
private int indexedIndexStorageTime = 0;
// private int indexedStackingTime = 0;
// private int indexedParsingTime = 0;
// private int indexedIndexingTime = 0;
// private int indexedStorageTime = 0;
private long totalParserTime = 0;
private int totalParserRuns = 0;

Expand Down Expand Up @@ -385,15 +394,17 @@ public int parse(String logLevel, String logLine) {
}
m = adv1.matcher (logLine);

if (m.find() && m.groupCount() >= 10) {
if (m.find() && m.groupCount() >= 8) {
indexedSites++;
indexedWordSum += Integer.parseInt(m.group(1));
indexedSiteSizeSum += Integer.parseInt(m.group(5));
indexedAnchorsCount += Integer.parseInt(m.group(6));
indexedStackingTime += Integer.parseInt(m.group(7));
indexedParsingTime += Integer.parseInt(m.group(8));
indexedIndexingTime += Integer.parseInt(m.group(9));
indexedStorageTime += Integer.parseInt(m.group(10));
indexedLinkStorageTime += Integer.parseInt(m.group(7));
indexedIndexStorageTime += Integer.parseInt(m.group(8));
// indexedStackingTime += Integer.parseInt(m.group(7));
// indexedParsingTime += Integer.parseInt(m.group(8));
// indexedIndexingTime += Integer.parseInt(m.group(9));
// indexedStorageTime += Integer.parseInt(m.group(10));
totalParserTime += (System.currentTimeMillis() - start);
totalParserRuns++;
return 0;
Expand Down Expand Up @@ -477,10 +488,12 @@ public Hashtable<String, Object> getResults() {
results.put(INDEXED_WORDS , new Integer(indexedWordSum));
results.put(INDEXED_SITES_SIZE , new Integer(indexedSiteSizeSum));
results.put(INDEXED_ANCHORS , new Integer(indexedAnchorsCount));
results.put(INDEXED_STACK_TIME , new Integer(indexedStackingTime));
results.put(INDEXED_PARSE_TIME , new Integer(indexedParsingTime));
results.put(INDEXED_INDEX_TIME , new Integer(indexedIndexingTime));
results.put(INDEXED_STORE_TIME , new Integer(indexedStorageTime));
// results.put(INDEXED_STACK_TIME , new Integer(indexedStackingTime));
// results.put(INDEXED_PARSE_TIME , new Integer(indexedParsingTime));
// results.put(INDEXED_INDEX_TIME , new Integer(indexedIndexingTime));
// results.put(INDEXED_STORE_TIME , new Integer(indexedStorageTime));
results.put(INDEXED_LINKSTORE_TIME , new Integer(indexedLinkStorageTime));
results.put(INDEXED_INDEXSTORE_TIME, new Integer(indexedIndexStorageTime));
results.put(TOTAL_PARSER_TIME , new Long(totalParserTime));
results.put(TOTAL_PARSER_RUNS , new Integer(totalParserRuns));
return results;
Expand All @@ -498,14 +511,16 @@ public void printResults() {
if(rankingDistributionCount == 0) rankingDistributionCount = 1;
if(DHTSelectionWordsTimeCount == 0) DHTSelectionWordsTimeCount = 1;
if(indexedSites != 0) indexedSites++;
System.out.println("INDEXER: Indexed " + indexedSites + " sites in " + (indexedStackingTime + indexedParsingTime + indexedIndexingTime + indexedStorageTime) + " milliseconds.");
System.out.println("INDEXER: Indexed " + indexedSites + " sites in " + (indexedLinkStorageTime + indexedIndexStorageTime) + " milliseconds.");
System.out.println("INDEXER: Indexed " + indexedWordSum + " words on " + indexedSites + " sites. (avg. words per site: " + (indexedWordSum / indexedSites) + ").");
System.out.println("INDEXER: Total Size of indexed sites: " + indexedSiteSizeSum + " bytes (avg. size per site: " + (indexedSiteSizeSum / indexedSites) + " bytes).");
System.out.println("INDEXER: Total Number of Anchors found: " + indexedAnchorsCount + "(avg. Anchors per site: " + (indexedAnchorsCount / indexedSites) + ").");
System.out.println("INDEXER: Total StackingTime: " + indexedStackingTime + " milliseconds (avg. StackingTime: " + (indexedStackingTime / indexedSites) + " milliseconds).");
System.out.println("INDEXER: Total ParsingTime: " + indexedParsingTime + " milliseconds (avg. ParsingTime: " + (indexedParsingTime / indexedSites) + " milliseconds).");
System.out.println("INDEXER: Total IndexingTime: " + indexedIndexingTime + " milliseconds (avg. IndexingTime: " + (indexedIndexingTime / indexedSites) + " milliseconds).");
System.out.println("INDEXER: Total StorageTime: " + indexedStorageTime + " milliseconds (avg. StorageTime: " + (indexedStorageTime / indexedSites) + " milliseconds).");
System.out.println("INDEXER: Total LinkStorageTime: " + indexedLinkStorageTime + " milliseconds (avg. StorageTime: " + (indexedLinkStorageTime / indexedSites) + " milliseconds).");
System.out.println("INDEXER: Total indexStorageTime: " + indexedIndexStorageTime + " milliseconds (avg. StorageTime: " + (indexedIndexStorageTime / indexedSites) + " milliseconds).");
// System.out.println("INDEXER: Total StackingTime: " + indexedStackingTime + " milliseconds (avg. StackingTime: " + (indexedStackingTime / indexedSites) + " milliseconds).");
// System.out.println("INDEXER: Total ParsingTime: " + indexedParsingTime + " milliseconds (avg. ParsingTime: " + (indexedParsingTime / indexedSites) + " milliseconds).");
// System.out.println("INDEXER: Total IndexingTime: " + indexedIndexingTime + " milliseconds (avg. IndexingTime: " + (indexedIndexingTime / indexedSites) + " milliseconds).");
// System.out.println("INDEXER: Total StorageTime: " + indexedStorageTime + " milliseconds (avg. StorageTime: " + (indexedStorageTime / indexedSites) + " milliseconds).");
if(urlSum != 0) urlSum++;
System.out.println("DHT: Recieved " + urlSum + " Urls in " + urlTimeSum + " ms. Blocked " + blockedURLSum + " URLs.");
System.out.println("DHT: " + urlTimeSum / urlSum + " milliseconds per URL.");
Expand Down

0 comments on commit 2dc7c00

Please sign in to comment.