Skip to content

Commit

Permalink
add log analysis scripts
Browse files Browse the repository at this point in the history
For internal use only

Change-Id: I0c1303b6eae1f1bae6e94bca1baa4c2893f51ba4
  • Loading branch information
smalyshev committed Sep 22, 2015
1 parent ce786b8 commit 3cd1600
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 0 deletions.
91 changes: 91 additions & 0 deletions log-analysis/analyze.py
@@ -0,0 +1,91 @@
import sys
import os
import subprocess
import os.path
import re

def getOut(cmd, file=None):
if file == None:
file = mainfile
return subprocess.check_output(cmd % file, shell = True).rstrip('0').replace('V1', '')

def getSummary(cmd, file=None):
return getOut("cat %%s | %s | ~/bin/r summary -" % cmd, file)

meanre = re.compile('\s*Mean\s*:\s*([\d.]+)', re.MULTILINE)
def getAvg(summary):
m = meanre.search(summary)
#print m, summary
return float(m.group(1))

logfile = sys.argv[1]
outfile = open(logfile + ".out.md", "w")
mainfile = logfile + ".part"

if not os.path.exists(mainfile):
print "Splitting %s into parts" % logfile
os.system("perl split.pl %s" % logfile)

print ("Main file %s " % mainfile)

batches = int(getOut("cat %s | grep 'changes in batch' | wc -l"))
print "%d batches in file" % batches
outfile.write("Logfile %s contains %d batches\n\n" % (logfile, batches))
outfile.write("= Batch Stats =\n")

modified_triples = getSummary("grep 'and modified' | cut -d ' ' -f 13")
modified_avg = getAvg(modified_triples)

changes_per_batch = getSummary("grep 'changes in batch' | cut -d ' ' -f 6")
print "%f changes per batch" % getAvg(changes_per_batch)
outfile.write("Changes per batch:\n%s\n" % changes_per_batch)

filtered_per_batch = getSummary("grep 'Filtered' | cut -d ' ' -f 9")
print "%f real changes per batch" % getAvg(filtered_per_batch)
outfile.write("Filtered changes per batch:\n%s\n" % filtered_per_batch)
filter_avg = getAvg(filtered_per_batch)

time_per_batch = getSummary("grep 'Polled up to' | cut -d ' ' -f 1 | perl -ne '/(\d+):(\d+):(\d+)\.(\d+)/; $ms = $4+1000*($3+60*($2+60*$1)); if($lastms) { print $ms-$lastms, chr(0xA); } $lastms = $ms;'")
outfile.write("Time per batch (ms):\n%s\n" % time_per_batch)
time_avg = getAvg(time_per_batch)

advance_per_batch = getSummary("grep 'Polled up to' | cut -d ' ' -f 10 | perl -ne '/T(\d+):(\d+):(\d+)Z/; $ms = ($3+60*($2+60*$1)); if($lastms) { print $ms-$lastms,chr(0xA); } $lastms = $ms;'")
outfile.write("Advance per batch (s):\n%s\n" % advance_per_batch)
adv_avg = getAvg(advance_per_batch)

outfile.write("Modified triples:\n%s\n" % modified_triples)

outfile.write("Modified triples - %f per entity\n" % (modified_avg/filter_avg))

outfile.write("= Processing Stats =\n")

outfile.write("Processing time per change: %f ms, %f updates/second\n" %(time_avg/filter_avg, filter_avg/time_avg*1000))
print ("Processing time per change: %f ms, %f updates/second\n" %(time_avg/filter_avg, filter_avg/time_avg*1000))

print "Stream is %f updates/s\n" % (filter_avg/adv_avg)
outfile.write("\nStream is %f updates/s\n" % (filter_avg/adv_avg))
outfile.write("\nUpdate/stream ratio is %f\n\n" % (adv_avg/time_avg*1000))

update_preparation = getSummary("grep 'Preparing update' | cut -d ' ' -f 10")
outfile.write("Update preparation time (ms):\n%s\n" % update_preparation)

sparqlname = logfile + ".q"
if not os.path.exists(sparqlname):
os.system("cat %s | perl getsparql.pl | sort > %s" %(logfile, sparqlname))
print "Queries in %s" % sparqlname

filter_times = getSummary("grep 'SELECT DISTINCT ?s WHERE' | cut -d ' ' -f 7", sparqlname)
update_times = getSummary("grep 'Clear' | cut -d ' ' -f 9", sparqlname)
tsupdate_times = getSummary("grep 'DELETE {' | cut -d ' ' -f 4", sparqlname)
values_times = getSummary("grep 'SELECT DISTINCT ?entity ?s' | cut -d ' ' -f 6", sparqlname)

outfile.write("== Query Stats ==\n")
outfile.write("FILTER query (ms):\n%s\n" % filter_times)
outfile.write("SELECT values query (ms):\n%s\n" % values_times)
outfile.write("UPDATE query (ms):\n%s\n" % update_times)
outfile.write("Timestamp UPDATE query (ms):\n%s\n" % tsupdate_times)

update_avg = getAvg(update_times)

outfile.write("Update query speed is %f entities/s, %f triples/s\n" % (filter_avg/update_avg*1000, modified_avg/update_avg*1000))
os.system("cat %s.out.md | pbcopy" % logfile)
7 changes: 7 additions & 0 deletions log-analysis/datediff.pl
@@ -0,0 +1,7 @@
while(<>) {
if(/(\d+):(\d+):(\d+)\.(\d+)/) {
$ms = $4+1000*($3+60*($2+60*$1));
if($lastms) { print $ms-$lastms, "\n"; }
$lastms = $ms;
}
}
32 changes: 32 additions & 0 deletions log-analysis/getq.pl
@@ -0,0 +1,32 @@
$qtype = undef;
while(<>) {
if(/Running SPARQL/) {
$qtype = 'other';
$cnt = 1;
}
if(/Completed in (\d+)/) {
if($qtype) {
print "$1 $qtype $cnt\n";
}
$qtype = undef;
next;
}
next unless $qtype;
$cnt++;
if(/ASK {/) {
$qtype = 'version';
}
if(/SELECT DISTINCT \?s/) {
$qtype = 'fetch'
}
if($qtype eq 'fetch' && m|<http://www.w3.org/ns/prov#wasDerivedFrom>|) {
$qtype = 'refs';
}
if($qtype eq 'refs' && /} UNION {/) {
$qtype = 'values';
}
if(/INSERT {/) {
$qtype = "update";
}
}

15 changes: 15 additions & 0 deletions log-analysis/getsparql.pl
@@ -0,0 +1,15 @@
$sparql = 0;
$name = "wtf";
while(<>) {
if(!$sparql) {
if(/Running SPARQL: (.+)/) {
$name = $1;
$sparql = 1;
}
} else {
if(/- Completed in (\d+) ms/) {
$sparql = 0;
print "$name => $1\n";
}
}
}
28 changes: 28 additions & 0 deletions log-analysis/split.pl
@@ -0,0 +1,28 @@

$f = $ARGV[0];

open $in, $ARGV[0] or die $!;
%FILES = {};
$out = '';
$outf = STDOUT;

while(<$in>) {
if(/\[(main|update (\d+))\]/) {
$newout = $1;
if($out ne $newout) {
$outf = switchout($newout, $2);
$out = $newout;
}
}
print $outf $_;
}

sub switchout {
my ($newout, $name) = @_;
if(!defined($FILES{$newout})) {
my $f = $ARGV[0].".part".$name;
print "Opening $f for $newout\n";
open $FILES{$newout}, ">", $f;
}
return $FILES{$newout};
}

0 comments on commit 3cd1600

Please sign in to comment.