In [5]:
from hdrh.histogram import HdrHistogram
import seaborn as sns
import pandas
from matplotlib import pyplot as plt
import os.path
from enum import Enum
import matplotlib as mpl
from typing import *
import pandas as pd

In [6]:
RUNID = ''
RUNID1 = 'boar-2022-04-10-Sun-114742'
RUNID2 = 'boar-2022-04-11-Mon-205004'
DACAPO = 'dacapochopin-29a657f'

BENCH = [
    'avrora.1989.14',
    'batik.1989.2141',
    'biojava.1989.380',
    'cassandra.1989.523',
    'eclipse.1989.1063',
    'fop.1989.145',
    'graphchi.1989.507',
    'h2.1989.2370',
    'h2o.1989.7341',
    'jython.1989.647',
    'luindex.1989.82',
    'lusearch.1989.105',
    'pmd.1989.1268',
    'sunflow.1989.173',
    'tomcat.1989.141',
    'xalan.1989.86',
    'zxing.1989.304',
]

DATA = [
    RUNID1 + '/{bench}.jdk-lxr.ix.common.tph.trace2-5.srv-128.srvw.lfb-32.dacapochopin-29a657f',
    RUNID2 + '/{bench}.jdk-lxr.ix.common.tph.trace2-5.srv-128.srvw.lfb-32.dacapochopin-29a657f',
]
MAX_INVOCATIONS = 20

In [7]:
def load_data(invocation: int, folder: str):
    path = os.path.realpath(os.path.expanduser('{}.{}/pauses.csv'.format(folder, invocation)))
    if not os.path.isfile(path):
        return None
    df =  pandas.read_csv(path, names=["nanos"])
    df["millis"] = df["nanos"] / 1000000
    df["micros"] = df["nanos"] / 1000
    return df

def process_data(data: Optional[List[str]] = None, invocations = MAX_INVOCATIONS):
    print(f'Loading...')
    # Clean up inputs
    if data is None:
        data = [ x for x in DATA ]
    # Load data
    data2 = {}
    for bench in BENCH:
        name = bench.split('.')[0]
        data2[name] = [
            f'~/MMTk-Dev/evaluation/results/log/{x}'.format(runid=RUNID, bench=bench, dacapo=DACAPO)
            for x in data
        ]
    data = data2
    data: Dict[str, List[str]]
    # Process data
    processed_data = {'name': [], 'p50': [], 'p95': []}  
    for bench, folders in data.items():
        dfs = []
        num = 0
        for folder in folders:
            for i in range(invocations):
                x = load_data(i, folder)
                if x is not None:
                    num += 1
                    dfs.append(x)
        df = pd.concat(dfs, ignore_index=True)
        processed_data['name'].append(bench)
        processed_data['p50'].append(df['millis'].quantile(0.5))
        processed_data['p95'].append(df['millis'].quantile(0.95))
        print(f'{bench}: {num} valid invocations')
    df = pd.DataFrame(processed_data)  
    df.to_csv('pauses-2x.csv', index=False)
    return df

In [8]:
process_data()

Loading...
avrora: 20 valid invocations
batik: 19 valid invocations
biojava: 20 valid invocations
cassandra: 20 valid invocations
eclipse: 18 valid invocations
fop: 20 valid invocations
graphchi: 19 valid invocations
h2: 19 valid invocations
h2o: 20 valid invocations
jython: 19 valid invocations
luindex: 20 valid invocations
lusearch: 18 valid invocations
pmd: 20 valid invocations
sunflow: 20 valid invocations
tomcat: 19 valid invocations
xalan: 7 valid invocations
zxing: 17 valid invocations


Unnamed: 0,name,p50,p95
0,avrora,1.089732,1.47006
1,batik,20.711531,24.546887
2,biojava,1.308235,4.542804
3,cassandra,4.584531,7.377159
4,eclipse,5.409463,12.365096
5,fop,2.936229,4.372228
6,graphchi,1.712553,4.256309
7,h2,9.808471,20.232784
8,h2o,12.63966,13.295054
9,jython,2.683735,3.074214
