In [17]:
import numpy as np
import matplotlib.pyplot as plt
import re
import os

In [20]:
res_path = '../../data/experiments/slowfast/slowfast_sg/temp/'
file_name = 'log.txt'

In [21]:
with open(res_path+file_name, 'r') as file:
    # Read the contents of the file
    content = file.read()

    # Extract all the numbers from the content
    numbers = re.findall(r'\d+', content)

In [22]:
numbers = np.array(list(map(int, numbers))).reshape((-1,11))

In [23]:
numbers[:, 3] = numbers[:, 3] + numbers[:, 4] * (1e-2)
numbers[:, 5] = numbers[:, 5] + numbers[:, 6] * (1e-2)
numbers[:, 7] = numbers[:, 7] + numbers[:, 8] * (1e-2)
numbers[:, 9] = numbers[:, 9] + numbers[:, 10] * (1e-2)

In [24]:
numbers = np.delete(numbers, 4, axis=1)
numbers = np.delete(numbers, 5, axis=1)
numbers = np.delete(numbers, 6, axis=1)
numbers = np.delete(numbers, 7, axis=1)

In [25]:
# CHANGE
t10 = numbers[0:204]
t20 = numbers[208:412]
t30 = numbers[416:620]
t40 = numbers[624:828]
t50 = numbers[832:1036]
# t100 = numbers[530:630]
# t150 = numbers[636:736]

In [27]:
# CHANGE
s = 5
arrs = [t10, t20, t30, t40, t50]
# arrs = [t10, t20, t30, t40, t50, t100, t150]

quants = np.arange(s*4).reshape((s,4)).astype(float)
errors = np.arange(s*4).reshape((s,4)).astype(float)
quants_s = np.arange(s*4).reshape((s,4)).astype(float)
errors_s = np.arange(s*4).reshape((s,4)).astype(float)

j = 0
for arr in arrs:
    for i in range(3,7):
        quants[j, i-3] = np.mean(arr[::2, i]).astype(float)
        errors[j, i-3] = np.std(arr[::2, i]).astype(float)
        quants_s[j, i-3] = np.mean(arr[1::2, i]).astype(float)
        errors_s[j, i-3] = np.std(arr[1::2, i]).astype(float)
    j += 1
    
quants *= 0.01
errors *= 0.01
quants_s *= 0.01
errors_s *= 0.01

In [28]:
quants

array([[0.5       , 0.59892157, 0.72107843, 0.08068627],
       [0.5       , 0.62313725, 0.62205882, 0.13117647],
       [0.5       , 0.66852941, 0.58372549, 0.07862745],
       [0.5       , 0.68460784, 0.48872549, 0.14127451],
       [0.5       , 0.69392157, 0.48676471, 0.12509804]])

# Plot curves

In [29]:
train_sizes = [10,20,30,40,50]
titles = ['minimum accuracy', 'validation accuracy', 'missed detection', 'false alarm']


for i in range(4):
    plt.figure(figsize=(7,4))
    plt.errorbar(train_sizes, quants[:, i], errors[:, i])
    plt.errorbar(train_sizes, quants_s[:, i], errors_s[:, i])
    plt.legend(['our method', 'our method + feature selection'])
    plt.xlabel('training set size')
    plt.ylabel(titles[i])
    plt.ylim([0,1])
    plt.xticks(train_sizes)
    _ = plt.title('val. set size = {}, Dt = {}, Dvt = {}, Dv = {}, $\mu$ = {}'.format(10, 'None', 2000, 0, 30))
    
    if not os.path.exists(res_path+'comparison/'):
        os.mkdir(res_path+'comparison/')
    plt.savefig(res_path+'comparison/train_curves_'+titles[i]+'.png')
    plt.close()

# Compute mean accuracy drop due to feature selection

In [30]:
acc_drops = quants[:, 1] - quants_s[:, 1]
acc_errbars = errors[:, 1]

print('Accuracy drops: \n', acc_drops)
print('Accuracy error bars: \n', acc_errbars)
print('\n-> Validation accuracy drops {:.2f}% on average, while the mean error bar is {:.2f}%'\
      .format(np.mean(acc_drops)*100, np.mean(acc_errbars)*100))

Accuracy drops: 
 [0.0677451  0.10509804 0.13127451 0.1322549  0.15862745]
Accuracy error bars: 
 [0.12206481 0.13266485 0.12266963 0.12087647 0.11950188]

-> Validation accuracy drops 11.90% on average, while the mean error bar is 12.36%


# Compute how many features were discarded on average

In [31]:
i = 0
M = np.loadtxt(res_path+'mat/finalM_10_'+str(train_sizes[i])+'_0.txt')

In [34]:
thresh = 50
# lim = (thresh/100) * np.max(M)
lim = np.percentile(M, thresh)
lim

2.8891054770075897e-07

In [35]:
disc_rate = sum((M < lim).reshape(-1))/(len(M)**2)
keep_rate = sum((M >= lim).reshape(-1))/(len(M)**2)

print('{:.3f} of entries were discarded'.format(disc_rate*100))

50.000 of entries were discarded


In [36]:
f_sz = 474 # must match data_params
xloc = np.broadcast_to(np.arange(f_sz), (f_sz, f_sz))
yloc = xloc.T

kept_features = np.unique(np.append(xloc[M >= lim], yloc[M >= lim]))
kept_f_rate = len(kept_features) / f_sz

print('{:.2f}% of features were kept; therefore {:.2f}% were discarded'.format(kept_f_rate*100, 100-kept_f_rate*100))

100.00% of features were kept; therefore 0.00% were discarded
