In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import rankdata
from statistics import median

from raise_utils.interpret import ResultsInterpreter, DODGEInterpreter
from raise_utils.interpret.sk import Rx

In [2]:
datasets = ['camel', 'cloudstack', 'cocoon', 'hadoop', 'deeplearning', 'ofbiz', 'qpid', 'hive', 'node']
times = ['1 day', '7 days', '14 days', '30 days', '90 days', '180 days', '365 days']
nondl_treatments = ['none', 'weighted', 'all']
dl_treatments = ['wfo', 'rwfo']

In [4]:
def get_scores(data, time, full=False):
    filename = f'./dodge-log/{data}-{time}.txt'
    r = DODGEInterpreter(files=[filename, f'./ghost-log/wfo/{data}-{time}-wfo.txt', 
                                    f'./ghost-log/rwfo/{data}-{time}-rwfo.txt'], metrics=['f1', 'pd', 'pf', 'prec'])

    medians = r.interpret()
    best = None
    best_val = 0.
    
    # For GHOST, the first metric is actually d2h
    if median(medians[f'{data}-{time}-wfo.txt']['f1']) > median(medians[f'{data}-{time}-rwfo.txt']['f1']):
        best = medians[f'{data}-{time}-wfo.txt']
    else:
        best = medians[f'{data}-{time}-rwfo.txt']
    
    nondl = medians[filename.split('/')[-1]]
    print('Recall:\n=======')
    d = {"Non-DL": nondl['pd'], "DL": best['pd']}
    Rx.show(Rx.sk(Rx.data(**d)))
        
    print('\npf:\n===')
    d = {"Non-DL": nondl['pf'], "DL": best['pf']}
    Rx.show(Rx.sk(Rx.data(**d)))
        
    print('\nPrec:\n=====')
    d = {"Non-DL": nondl['prec'], "DL": best['prec']}
    Rx.show(Rx.sk(Rx.data(**d)))

In [5]:
for data in datasets:
    for time in times:
        try:
            print(f'{data}-{time}')
            print('=' * len(f'{data}-{time}'))
            get_scores(data, time, full=True)
            print()
        except:
            pass

camel-1 day
Recall:
   1     Non-DL (                         |             -*         ), 0.783,  0.796,  0.816,  0.816,  0.822
   2         DL (                         |                  -*-   ), 0.891,  0.896,  0.913,  0.929,  0.951

pf:
===
   1     Non-DL (    *                    |                        ), 0.085,  0.086,  0.095,  0.097,  0.101
   2         DL (    -*                   |                        ), 0.093,  0.099,  0.102,  0.107,  0.111

Prec:
=====
   1     Non-DL (                         |-*                      ), 0.532,  0.536,  0.551,  0.559,  0.562
   2         DL (                         |   -*                   ), 0.589,  0.596,  0.606,  0.614,  0.622

camel-7 days
Recall:
   1     Non-DL (                         |  *-                    ), 0.563,  0.571,  0.580,  0.580,  0.605
   2         DL (                         |   ---*-----            ), 0.561,  0.596,  0.658,  0.693,  0.772

pf:
===
   1     Non-DL (      *                  |                    

Recall:
   1     Non-DL (                   *-----|----------------        ), 0.385,  0.385,  0.385,  0.385,  0.846
   2         DL (                        -|--*                     ), 0.438,  0.500,  0.562,  0.562,  0.562

pf:
===
   1         DL ( *                       |                        ), 0.032,  0.035,  0.038,  0.048,  0.054
   2     Non-DL (     *----------         |                        ), 0.101,  0.107,  0.112,  0.123,  0.323

Prec:
=====
   1     Non-DL (     *                   |                        ), 0.087,  0.102,  0.111,  0.111,  0.125
   2         DL (                --*----  |                        ), 0.321,  0.333,  0.364,  0.429,  0.462

cloudstack-365 days
Recall:
   1         DL (                  *------|-----                   ), 0.375,  0.375,  0.375,  0.375,  0.625
   2     Non-DL (                         |   *------------------- ), 0.400,  0.600,  0.600,  0.600,  1.000

pf:
===
   1         DL ( *-                      |                        )

Recall:
   1         DL (                         |   *----                ), 0.560,  0.583,  0.599,  0.622,  0.692
   2     Non-DL (                         |      *                 ), 0.647,  0.651,  0.655,  0.657,  0.663

pf:
===
   1         DL (   -*                    |                        ), 0.055,  0.071,  0.081,  0.107,  0.119
   2     Non-DL (      -*                 |                        ), 0.136,  0.139,  0.142,  0.144,  0.156

Prec:
=====
   1     Non-DL (                        *|                        ), 0.477,  0.484,  0.485,  0.493,  0.504
   2         DL (                         |   -*---                ), 0.542,  0.590,  0.612,  0.677,  0.688

deeplearning-1 day
Recall:
   1     Non-DL (                         |           -*           ), 0.744,  0.758,  0.771,  0.780,  0.789
   2         DL (                         |               -*-      ), 0.801,  0.829,  0.854,  0.870,  0.898

pf:
===
   1     Non-DL (                *---     |                        ),

## Manually done

In [2]:
filename = f'./orig-ghost-log/cocoon-14 days.txt'
r = DODGEInterpreter(files=[filename], metrics=['d2h', 'pd', 'pf', 'prec'])

medians = r.interpret()['cocoon-14 days.txt']
median(medians['pd']), median(medians['pf']), median(medians['prec'])

(0.8, 0.1863905325443787, 0.04061855670103093)

In [4]:
import os

In [47]:
done = []
for file in os.listdir('./orig-ghost-log/'):
    try:
        print(file)
        print('=' * len(file))
        filename = f'./orig-ghost-log/{file}'

        r = DODGEInterpreter(files=[filename], metrics=['d2h', 'pd', 'pf', 'prec'], 
                             max_by=lambda t: t[1] - .7* t[2])

        medians = r.interpret()[file]
        print(median(medians['pd']), median(medians['pf']), median(medians['prec']))
        print()
        done.append(file)
    except:
        pass

print(done)

cocoon-180 days.txt
0.9 0.358739837398374 0.09618154466600519

cloudstack-14 days.txt
0.84 0.5096418732782368 0.10097589396256645

cocoon-7 days.txt
1.0 0.23108384458077708 0.16912679102460124

cocoon-90 days.txt
1.0 0.22535211267605632 0.11811755952380952

cloudstack-90 days.txt
0.8500000000000001 0.3254189944134078 0.18256627574652606

cocoon-1 day.txt
1.0 0.2272727272727273 0.20294117647058824

camel-365 days.txt
0.9177693761814745 0.31360544217687075 0.6785200199187688

cocoon-14 days.txt
1.0 0.26528599605522685 0.033171966112883707

hadoop-1 day.txt
cocoon-365 days.txt
0.9 0.14243027888446216 0.11180555555555555

camel-1 day.txt
0.9127906976744186 0.15567765567765568 0.47939043037219364

cloudstack-30 days.txt
0.8200000000000001 0.38980716253443526 0.12403993855606758

cocoon-30 days.txt
0.9166666666666666 0.196 0.102803738317757

camel-180 days.txt
0.8201219512195121 0.1990909090909091 0.37787679913463945

cloudstack-1 day.txt
0.8855421686746988 0.4189189189189189 0.6162005785920

In [10]:
for file in os.listdir('./orig-ghost-log/'):
    try:
        if file in ['cocoon-180 days.txt', 'cloudstack-14 days.txt', 'cocoon-7 days.txt', 'cocoon-90 days.txt', 'cloudstack-90 days.txt', 'cocoon-1 day.txt', 'camel-365 days.txt', 'cocoon-14 days.txt', 'cocoon-365 days.txt', 'camel-1 day.txt', 'cloudstack-30 days.txt', 'cocoon-30 days.txt', 'camel-180 days.txt', 'cloudstack-1 day.txt', 'camel-90 days.txt', 'hadoop-7 days.txt', 'qpid-14 days.txt', 'camel-7 days.txt', 'hadoop-30 days.txt', 'qpid-90 days.txt', 'camel-14 days.txt', 'qpid-30 days.txt', 'cloudstack-7 days.txt', 'cloudstack-365 days.txt', 'camel-30 days.txt', 'hadoop-14 days.txt']:
            continue
        print(file)
        print('=' * len(file))
        filename = f'./orig-ghost-log/{file}'

        r = DODGEInterpreter(files=[filename], metrics=['d2h', 'pd', 'pf', 'prec'], 
                             max_by=0
                            )

        medians = r.interpret()[file]
        print(median(medians['pd']), median(medians['pf']), median(medians['prec']))
        print()
    except:
        raise

hadoop-1 day.txt
0.8571428571428571 0.06773236651285432 0.05480835834680212

qpid-7 days.txt
0.6136363636363636 0.2120754716981132 0.08547974993204675

hadoop-90 days.txt
0.6945754716981132 0.2772484756097561 0.2885071917329982

qpid-1 day.txt
0.6206896551724138 0.2410373760488177 0.10316375750681431

ofbiz-90 days.txt
0.6 0.3851063829787234 0.13145708037790665



## Non-DL

In [5]:
done = []
for file in os.listdir('./ghost-nondl/'):
    try:
        print(file)
        print('=' * len(file))
        filename = f'./ghost-nondl/{file}'

        r = DODGEInterpreter(files=[filename], metrics=['d2h', 'pd', 'pf', 'prec'], 
                             max_by=lambda t: t[1] - t[2])

        medians = r.interpret()[file]
        print(median(medians['pd']), median(medians['pf']), median(medians['prec']))
        print()
        done.append(file)
    except:
        pass

print(done)

qpid-365 days.txt
0.424812030075188 0.07119741100323625 0.38976190476190475

deeplearning-14 days.txt
0.25 0.06932153392330384 0.1481818181818182

cocoon-180 days.txt
0.8076923076923077 0.28256513026052105 0.0700750659362954

cloudstack-14 days.txt
0.41379310344827586 0.21309192200557103 0.1322142064372919

hive-14 days.txt
0.5 0.007845934379457917 0.32291666666666663

cocoon-7 days.txt
0.7916666666666666 0.1915983606557377 0.16929352130524314

cocoon-90 days.txt
0.9444444444444444 0.25101214574898784 0.12059163857725008

deeplearning-1 day.txt
0.8061674008810573 0.32954545454545453 0.8066619250829776

deeplearning-90 days.txt
0.5 0.11174785100286533 0.11363636363636363

hive-90 days.txt
0.7326923076923078 0.041666666666666664 0.7957298853696964

hive-180 days.txt
0.7140845070422535 0.036896877956480605 0.8677221302428256

cloudstack-90 days.txt
0.6363636363636364 0.30563380281690145 0.16228070175438597

cocoon-1 day.txt
0.9375 0.25416666666666665 0.19671766589574807

camel-365 days.tx