In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import rankdata
from statistics import median

from raise_utils.interpret import ResultsInterpreter, DODGEInterpreter
from raise_utils.interpret.sk import Rx

In [2]:
datasets = ['camel', 'cloudstack', 'cocoon', 'hadoop', 'deeplearning', 'ofbiz', 'qpid', 'hive', 'node']
times = ['1 day', '7 days', '14 days', '30 days', '90 days', '180 days', '365 days']
nondl_treatments = ['none', 'weighted', 'all']
dl_treatments = ['wfo', 'rwfo']

In [3]:
def get_scores(data, time, full=False):
    filename = f'./dodge-log/{data}-{time}.txt'
    r = DODGEInterpreter(files=[filename, f'./ghost-log/wfo/{data}-{time}-wfo.txt', 
                                    f'./ghost-log/rwfo/{data}-{time}-rwfo.txt'], metrics=['f1', 'pd', 'pf', 'prec'])

    medians = r.interpret()
    best = None
    best_val = 0.
    
    # For GHOST, the first metric is actually d2h
    if median(medians[f'{data}-{time}-wfo.txt']['f1']) > median(medians[f'{data}-{time}-rwfo.txt']['f1']):
        best = medians[f'{data}-{time}-wfo.txt']
    else:
        best = medians[f'{data}-{time}-rwfo.txt']
    
    nondl = medians[filename.split('/')[-1]]
    print('Recall:\n=======')
    d = {"Non-DL": nondl['pd'], "DL": best['pd']}
    Rx.show(Rx.sk(Rx.data(**d)))
        
    print('\npf:\n===')
    d = {"Non-DL": nondl['pf'], "DL": best['pf']}
    Rx.show(Rx.sk(Rx.data(**d)))
        
    print('\nPrec:\n=====')
    d = {"Non-DL": nondl['prec'], "DL": best['prec']}
    Rx.show(Rx.sk(Rx.data(**d)))

In [4]:
for data in datasets:
    for time in times:
        try:
            print(f'{data}-{time}')
            print('=' * len(f'{data}-{time}'))
            get_scores(data, time, full=True)
            print()
        except:
            pass

camel-1 day
camel-7 days
camel-14 days
camel-30 days
camel-90 days
camel-180 days
camel-365 days
cloudstack-1 day
cloudstack-7 days
cloudstack-14 days
cloudstack-30 days
cloudstack-90 days
cloudstack-180 days
cloudstack-365 days
cocoon-1 day
cocoon-7 days
cocoon-14 days
cocoon-30 days
cocoon-90 days
cocoon-180 days
cocoon-365 days
hadoop-1 day
hadoop-7 days
hadoop-14 days
hadoop-30 days
hadoop-90 days
hadoop-180 days
hadoop-365 days
deeplearning-1 day
deeplearning-7 days
deeplearning-14 days
deeplearning-30 days
deeplearning-90 days
deeplearning-180 days
deeplearning-365 days
ofbiz-1 day
ofbiz-7 days
ofbiz-14 days
ofbiz-30 days
ofbiz-90 days
ofbiz-180 days
ofbiz-365 days
qpid-1 day
qpid-7 days
qpid-14 days
qpid-30 days
qpid-90 days
qpid-180 days
qpid-365 days
hive-1 day
hive-7 days
hive-14 days
hive-30 days
hive-90 days
hive-180 days
hive-365 days
node-1 day
node-7 days
node-14 days
node-30 days
node-90 days
node-180 days
node-365 days


## Manually done

In [2]:
filename = f'./orig-ghost-log/cocoon-14 days.txt'
r = DODGEInterpreter(files=[filename], metrics=['d2h', 'pd', 'pf', 'prec'])

medians = r.interpret()['cocoon-14 days.txt']
median(medians['pd']), median(medians['pf']), median(medians['prec'])

(0.8, 0.1863905325443787, 0.04061855670103093)

In [5]:
import os

In [10]:
done = []
for file in os.listdir('./orig-ghost-log/'):
    try:
        print(file)
        print('=' * len(file))
        filename = f'./orig-ghost-log/{file}'

        r = DODGEInterpreter(files=[filename], metrics=['d2h', 'pd', 'pf', 'prec', 'auc', 'f1'], 
                             max_by=lambda t: t[1] - t[2])

        medians = r.interpret()[file]
        print(median(medians['pd']), median(medians['pf']), median(medians['prec']))
        print()
        done.append(file)
    except:
        pass

print(done)

cocoon-180 days.txt
cloudstack-14 days.txt
hive-14 days.txt
0.9 0.07738944365192582 0.06851688043638507

cocoon-7 days.txt
cocoon-90 days.txt
hive-90 days.txt
0.8467153284671532 0.2535149384885764 0.44679248202286925

cloudstack-90 days.txt
cocoon-1 day.txt
camel-365 days.txt
cocoon-14 days.txt
hadoop-1 day.txt
cocoon-365 days.txt
camel-1 day.txt
cloudstack-30 days.txt
hive-30 days.txt
0.8048780487804879 0.1549963530269876 0.13489010989010988

hive-1 day.txt
0.8 0.023098791755508174 0.11128048780487805

ofbiz-7 days.txt
0.6947368421052631 0.4857142857142857 0.244676048858205

cocoon-30 days.txt
camel-180 days.txt
cloudstack-1 day.txt
ofbiz-30 days.txt
0.6138613861386139 0.37915512465373963 0.0968938560635895

camel-90 days.txt
qpid-7 days.txt
0.5543478260869565 0.1345427059712774 0.12747963788063682

hadoop-7 days.txt
qpid-14 days.txt
0.5862068965517241 0.2007462686567164 0.05778105056455572

camel-7 days.txt
node-14 days.txt
0.7407407407407407 0.5077628793225124 0.12309571908142955

h

In [12]:
for file in os.listdir('./orig-ghost-log/'):
    try:
        print(file)
        print('=' * len(file))
        filename = f'./orig-ghost-log/{file}'

        r = DODGEInterpreter(files=[filename], metrics=['d2h', 'pd', 'pf', 'prec', 'auc', 'f1'], 
                             max_by=0
                            )

        medians = r.interpret()[file]
        print(median(medians['pd']), median(medians['pf']), median(medians['prec']))
        print()
    except:
        pass

cocoon-180 days.txt
cloudstack-14 days.txt
hive-14 days.txt
0.9 0.09094151212553495 0.06703590527119939

cocoon-7 days.txt
cocoon-90 days.txt
hive-90 days.txt
0.8248175182481752 0.22847100175746926 0.4652715278081174

cloudstack-90 days.txt
cocoon-1 day.txt
camel-365 days.txt
cocoon-14 days.txt
hadoop-1 day.txt
cocoon-365 days.txt
camel-1 day.txt
cloudstack-30 days.txt
hive-30 days.txt
0.8170731707317074 0.15718453683442743 0.13056110556110556

hive-1 day.txt
0.8 0.023098791755508174 0.11128048780487805

ofbiz-7 days.txt
0.6578947368421053 0.4646825396825397 0.2464858628954461

cocoon-30 days.txt
camel-180 days.txt
cloudstack-1 day.txt
ofbiz-30 days.txt
0.6138613861386139 0.39508310249307477 0.09475859681014522

camel-90 days.txt
qpid-7 days.txt
0.6195652173913044 0.22789115646258504 0.08425283732660782

hadoop-7 days.txt
qpid-14 days.txt
0.6206896551724138 0.30708955223880596 0.04439510978650933

camel-7 days.txt
node-14 days.txt
0.6703703703703703 0.4255469301340861 0.126171786671835

## Non-DL

In [5]:
done = []
for file in os.listdir('./ghost-nondl/'):
    try:
        print(file)
        print('=' * len(file))
        filename = f'./ghost-nondl/{file}'

        r = DODGEInterpreter(files=[filename], metrics=['d2h', 'pd', 'pf', 'prec'], 
                             max_by=lambda t: t[1] - t[2])

        medians = r.interpret()[file]
        print(median(medians['pd']), median(medians['pf']), median(medians['prec']))
        print()
        done.append(file)
    except:
        pass

print(done)

qpid-365 days.txt
0.424812030075188 0.07119741100323625 0.38976190476190475

deeplearning-14 days.txt
0.25 0.06932153392330384 0.1481818181818182

cocoon-180 days.txt
0.8076923076923077 0.28256513026052105 0.0700750659362954

cloudstack-14 days.txt
0.41379310344827586 0.21309192200557103 0.1322142064372919

hive-14 days.txt
0.5 0.007845934379457917 0.32291666666666663

cocoon-7 days.txt
0.7916666666666666 0.1915983606557377 0.16929352130524314

cocoon-90 days.txt
0.9444444444444444 0.25101214574898784 0.12059163857725008

deeplearning-1 day.txt
0.8061674008810573 0.32954545454545453 0.8066619250829776

deeplearning-90 days.txt
0.5 0.11174785100286533 0.11363636363636363

hive-90 days.txt
0.7326923076923078 0.041666666666666664 0.7957298853696964

hive-180 days.txt
0.7140845070422535 0.036896877956480605 0.8677221302428256

cloudstack-90 days.txt
0.6363636363636364 0.30563380281690145 0.16228070175438597

cocoon-1 day.txt
0.9375 0.25416666666666665 0.19671766589574807

camel-365 days.tx