In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os

# Load curves

In [2]:
def load_curves(quantities, val_sizes, res_path):
    curves = {}
    errors = {}
    
    for i in range(len(val_sizes)):
        curves_i = np.loadtxt(res_path+'curves_'+str(i)+'.txt')
        curves_i = curves_i.reshape(8, -1)
        j = 0
        for quantity in quantities:
            if i == 0:
                curves[quantity] = curves_i[2*j].reshape(1, -1)
                errors[quantity] = curves_i[2*j+1].reshape(1, -1)
            else:
                curves[quantity] = np.concatenate((curves[quantity], [curves_i[2*j]]), axis=0)
                errors[quantity] = np.concatenate((errors[quantity], [curves_i[2*j+1]]), axis=0)
            j += 1
            
    return curves, errors

## Similarity graph

In [3]:
# common params
quantities = ['min_acc', 'val_acc', 'missed', 'false_alarm']
val_sizes = [10]

# sg params
res_path = '../../data/experiments/slowfast/slowfast_sg/2023_03_15-sg-bal/mu_30/'

sg_curves, sg_errors = load_curves(quantities, val_sizes, res_path)

# print resutls
print('curves:\n', sg_curves)
print('\nerrors:\n', sg_errors)

curves:
 {'min_acc': array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]), 'val_acc': array([[0.684, 0.702, 0.778, 0.766, 0.794, 0.79 ]]), 'missed': array([[0.48 , 0.404, 0.304, 0.312, 0.22 , 0.296]]), 'false_alarm': array([[0.152, 0.192, 0.14 , 0.156, 0.192, 0.124]])}

errors:
 {'min_acc': array([[0., 0., 0., 0., 0., 0.]]), 'val_acc': array([[0.04841487, 0.03969887, 0.03187475, 0.02244994, 0.04498889,
        0.03794733]]), 'missed': array([[0.0669328 , 0.10071743, 0.08236504, 0.04833218, 0.07375636,
        0.048     ]]), 'false_alarm': array([[0.04833218, 0.08997778, 0.07266361, 0.026533  , 0.04118252,
        0.05425864]])}


## XGboost

In [4]:
# get xgb curves
res_path = '../../data/experiments/slowfast/slowfast_xgb/2023_03_13-no-hypertune/'
xgb_curves, xgb_errors = load_curves(quantities, val_sizes, res_path)

# print resutls
print('curves:\n', xgb_curves)
print('\nerrors:\n', xgb_errors)

curves:
 {'min_acc': array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]), 'val_acc': array([[0.669, 0.674, 0.746, 0.744, 0.758, 0.772]]), 'missed': array([[0.39 , 0.35 , 0.28 , 0.276, 0.316, 0.268]]), 'false_alarm': array([[0.272, 0.302, 0.228, 0.236, 0.168, 0.188]])}

errors:
 {'min_acc': array([[0., 0., 0., 0., 0., 0.]]), 'val_acc': array([[0.03014963, 0.054626  , 0.04521062, 0.05444263, 0.04354308,
        0.0401995 ]]), 'missed': array([[0.09088454, 0.08062258, 0.06196773, 0.07472617, 0.07144228,
        0.07807689]]), 'false_alarm': array([[0.10047885, 0.11187493, 0.08447485, 0.05425864, 0.04489989,
        0.053066  ]])}


## K-nearest neighbors

In [5]:
# get knn curves
res_path = '../../data/experiments/slowfast/slowfast_knn/2023_03_13-knn-nei1/'
knn_curves, knn_errors = load_curves(quantities, val_sizes, res_path)

# print resutls
print('curves:\n', knn_curves)
print('\nerrors:\n', knn_errors)

curves:
 {'min_acc': array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]), 'val_acc': array([[0.619, 0.658, 0.72 , 0.721, 0.761, 0.792]]), 'missed': array([[0.582, 0.482, 0.336, 0.352, 0.318, 0.254]]), 'false_alarm': array([[0.18 , 0.202, 0.224, 0.206, 0.16 , 0.162]])}

errors:
 {'min_acc': array([[0., 0., 0., 0., 0., 0.]]), 'val_acc': array([[0.03780212, 0.0505569 , 0.06855655, 0.03562303, 0.04763402,
        0.03249615]]), 'missed': array([[0.0666033 , 0.06838128, 0.08890444, 0.05878775, 0.060959  ,
        0.04737088]]), 'false_alarm': array([[0.11696153, 0.05758472, 0.08333067, 0.042     , 0.06511528,
        0.05963221]])}


## Logistic regression

In [6]:
# get lgrg curves
res_path = '../../data/experiments/slowfast/slowfast_lgrg/2023_03_14-lgrg-l2/'
lgrg_curves, lgrg_errors = load_curves(quantities, val_sizes, res_path)

# print resutls
print('curves:\n', lgrg_curves)
print('\nerrors:\n', lgrg_errors)

curves:
 {'min_acc': array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]), 'val_acc': array([[0.72 , 0.748, 0.765, 0.774, 0.79 , 0.829]]), 'missed': array([[0.392, 0.326, 0.292, 0.296, 0.264, 0.22 ]]), 'false_alarm': array([[0.168, 0.178, 0.178, 0.156, 0.156, 0.122]])}

errors:
 {'min_acc': array([[0., 0., 0., 0., 0., 0.]]), 'val_acc': array([[0.05039841, 0.05455273, 0.04801042, 0.04923413, 0.02683282,
        0.04158125]]), 'missed': array([[0.10777755, 0.06873136, 0.04995998, 0.06858571, 0.06621178,
        0.04      ]]), 'false_alarm': array([[0.0688186 , 0.06838128, 0.06539113, 0.04176123, 0.05782733,
        0.06029925]])}


# Plot curves

In [7]:
# group all curves
curves = [sg_curves, lgrg_curves, knn_curves, xgb_curves]
errors = [sg_errors, lgrg_errors, knn_errors, xgb_errors]

In [18]:
# plot curves
res_path = '../../data/experiments/slowfast/comparison/2023_03_15-sg-xgb-knn-lgrg/'
full_title = {'min_acc': 'minimum accuracy', 'val_acc': 'validation accuracy', 'missed': 'missed detection rate', 'false_alarm': 'false alarm rate'}
train_sizes = [50, 100, 150, 200, 250, 300]
val_sizes = [10]

if not os.path.exists(res_path):
    os.mkdir(res_path)

i = 0
for quantity in quantities:
    plt.figure(figsize=(8,4))
    
    # plot for all methods
    for m in range(len(curves)):        
        plt.errorbar(train_sizes, curves[m][quantity][i], errors[m][quantity][i])
    if quantity=='val_acc':
        plt.errorbar(train_sizes, sg_curves['min_acc'][i], sg_errors['min_acc'][i])
    
    # legend
    if quantity=='val_acc':
        plt.legend(['our method', 'logreg', 'kNN', 'gbdt', 'min acc'])
    else:
        plt.legend(['our method', 'logreg', 'kNN', 'gbdt'])
                    
    plt.xlabel('training set size')
    plt.ylabel(full_title[quantity])
    plt.ylim([0,1])
    _ = plt.title('validation set size = {}'.format(val_sizes[i]))
    plt.savefig(res_path+'curves_'+str(quantity)+'.png')
    plt.close()