In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matlab.engine # to run Cheng's code
import os

# add the path to my packages to system paths so they can be imported
import sys
sys.path.append('/home/yasamanparhizkar/Documents/yorku/01_thesis/code/my_packages')
# sys.path.append('F:\MAScThesis\code\my_packages')
# sys.path.append('/home/yasamanparhizkar/Documents/thesis/code/my_packages')

import data_handler_03 as dh
import my_simgraph_06 as sg
import assess_simgraph_03_cheng as asg

# Load spike data

Spike data shape:  (297, 1141, 113) $\implies$ (movie repeats, frames/time, neurons)
<br>
Labels are 1 (= spike) or -1 (= no spike).

In [2]:
# load all spike data from file
spikes_dp = '../../../data/original_files/spikes.csv'
binned_data = np.loadtxt(spikes_dp, delimiter=',')
binned_data = binned_data.reshape(binned_data.shape[0], 1141, 113)
binned_data = binned_data * 2 - 1     # turn labels from 0,1 to -1,1

I_order_10 = [54, 35, 10, 60, 74, 9, 61, 56, 91, 104]

## Group all 113 neurons

This will create a more balanced dataset which is presumabley easier to solve.
<br>
Grouped data shape:  (297, 1141, 1) $\implies$ (movie repeats, frames/time, group)

In [3]:
# group all neurons together
grouped_data = np.zeros((297, 1141, 1))
for trial in range(297):
    for frame in range(1141):
        grouped_data[trial, frame, :] = 2 * int((binned_data[trial, frame, :] == 1).any()) - 1

In [4]:
# print some statistics
print('grouped_data.shape = ', grouped_data.shape)

avg_spike_perc = 0
print('trial #    | percentage belonging to class 1')
print('---------------------------------------------')
for trial in range(10):
    pers = dh.class_percentages(grouped_data[trial, :, :].reshape(-1), [-1, 1])
    avg_spike_perc += pers[1]
    print('trial #{:3} | {:.2f} %'.format(trial, pers[1]))

avg_spike_perc /= 10
print('---------------------------------------------')
print('AVERAGE     | {:.2f} %'.format(avg_spike_perc))

total_perc = np.sum(grouped_data == 1) *100 /(grouped_data.shape[0] * grouped_data.shape[1])
print('---------------------------------------------')
print('{:.2f} % of the whole data belongs to class 1.'.format(total_perc))

grouped_data.shape =  (297, 1141, 1)
trial #    | percentage belonging to class 1
---------------------------------------------
trial #  0 | 66.26 %
trial #  1 | 69.06 %
trial #  2 | 67.92 %
trial #  3 | 71.08 %
trial #  4 | 68.97 %
trial #  5 | 68.27 %
trial #  6 | 66.87 %
trial #  7 | 65.82 %
trial #  8 | 67.66 %
trial #  9 | 68.19 %
---------------------------------------------
AVERAGE     | 68.01 %
---------------------------------------------
68.47 % of the whole data belongs to class 1.


# Assess the model's performance with random tests

In [5]:
def transform(fv):
    """
    Transform to be applied on feature vectors.
    
    Input: fv
    fv - 1xDf torch tensor representing a feature vector
    
    Output: fvv
    fvv - 1xDf' torch tensor representing the transformed feature vector
    """
    
    # for faster run and less memory usage
    fvv = fv[:, ::10]
    
    # for numerical stability during GD
    # fvv = fvv * 10
    
    return fvv

# data retrieval params
data_params = {'func': dh.datapoint_torch, 'lbl_func': dh.get_labels, 'features_dp': '../../../data/features/slowfast/slowfast_4732/', \
               'spike_data': grouped_data, 'group_id': 0, 'transform': transform, 'ind_min': 1*1141+0, 'ind_max': 2*1141-1}

# graph construction and penalty term parameters
sg_params = {'mu': 30, 'Dt': None, 'Dv':0, 'Dvt':2000, \
             'cnstr_method_tt': 'time', 'cnstr_method_vv': 'time', 'cnstr_method_vt': 'time',\
             'train_t': None, 'val_t': None, \
             'edges_tt':None, 'edges_vv':None, 'edges_vt':None, }

# gradient descent parameters
sg_opt_params = { 'epsilon0':1, 'epsilon_decay':0.5, 'epsilon_jump': 2, \
                'num_its':16, 'check_freq':1, 'print_checks':False, 'Theta0':None, \
                'force_all_its': True, 'threshold': 0.01}

# randomization parameters
rnd_params = {'train_sizes': [50, 100, 150, 200, 250, 300, 350], 'val_sizes': [30], 'train_its': 5, 'val_its': 10, 'seed': None}
# rnd_params = {'train_sizes': [400], 'val_sizes': [10], 'train_its': 2, 'val_its': 3, 'seed': None}

# parameters to visualize the optimized M
# f_sz = 474 # must match data_params
f_sz = int(np.ceil(4732/10)) # must match data_params
xloc = np.broadcast_to(np.arange(f_sz), (f_sz, f_sz))
yloc = xloc.T
fig_params = {'rmark_th': 30, 'f_sz': f_sz, 'xloc': xloc, 'yloc': yloc}

# start a matlab engine to run Cheng's code
eng = matlab.engine.start_matlab()
eng.cd(r'/home/yasamanparhizkar/Documents/yorku/01_thesis/code/15_cheng', nargout=0)

# path to save the results
res_path_sg = '../../../data/experiments/comparison/temp/sg/'
res_path_lgrg = '../../../data/experiments/comparison/temp/lgrg/'

In [6]:
# import importlib
# importlib.reload(sg)
# importlib.reload(asg)

In [None]:
# times will be measured together and  written in res_path_sg
val_num_res_sg, val_num_err_sg, val_num_res_lgrg, val_num_err_lgrg = asg.assess_sg_model(eng, data_params, sg_params, sg_opt_params, rnd_params, fig_params, res_path_sg, res_path_lgrg)

-> Iteration ID: 30_50_0
starting SGML.
initial objective value = -18276.6217
 
node_number = 1
min objective value = -18276.6217
minimal eigenvalue of M = 0.8
PD conditions satisfied: 0
PD conditions unsatisfied: -5.6016
first eigenvector has 0 entries: 0
exitflag = 1
 
node_number = 2
min objective value = -399136.8462
minimal eigenvalue of M = 1.0002e-05
PD conditions satisfied: 0
PD conditions unsatisfied: -1.44827e-07 -3.33427e-07 -2.43565e-07 -6.91075e-08 -3.18221e-07 -1.58675e-07 -6.86152e-08 -7.78563e-08 -1.36027e-07  -1.2826e-07 -1.50152e-07  -9.9169e-08   -1.731e-07 -8.51421e-10 -4.45122e-09 -1.85465e-09 -3.56814e-09  -4.8451e-09  -4.3304e-09 -6.72129e-09 -1.02376e-08 -6.40781e-09 -8.56051e-09  -4.7584e-09  -4.3695e-09 -1.97656e-09 -2.21265e-09 -9.09179e-10 -1.89597e-06 -0.000604636     -42.7398 -0.000227007 -2.51589e-08 -1.93883e-08 -1.64319e-08 -4.06231e-08 -2.46572e-08  -2.8648e-08 -2.47698e-08 -4.88399e-08 -9.45622e-09 -2.98539e-08 -1.55056e-08 -1.49257e-08  -1.8731e-08  

In [None]:
asg.plot_curves(rnd_params, sg_params, res_path_sg)

In [None]:
asg.plot_curves_without_runtime(rnd_params, sg_params, res_path_lgrg)

In [None]:
# close the matlab engine when you're done
eng.quit()

# Plot similarity graph and logistic regression validation accuracies together

In [None]:
# unpack params
train_sizes = rnd_params['train_sizes']
val_sizes = rnd_params['val_sizes']
train_its = rnd_params['train_its']
val_its = rnd_params['val_its']
assess_qs = ['min_acc', 'val_acc', 'missed', 'false_alarm']

# read sg training curves
curves_sg = {}
errors_sg = {}
for i in range(len(val_sizes)):
    curves_i = np.loadtxt(res_path_sg+'curves/train_'+str(i)+'.txt')
    j = 0
    for quantity in assess_qs:
        if i==0:
            curves_sg[quantity] = curves_i[j::8].reshape((1, -1))
            errors_sg[quantity] = curves_i[j+1::8].reshape((1, -1))
        else:
            curves_sg[quantity] = np.concatenate((curves[quantity], [curves_i[j::8]]), axis=0)
            errors_sg[quantity] = np.concatenate((errors[quantity], [curves_i[j+1::8]]), axis=0)
        j += 2
        
# read lgrg training curves
curves_lgrg = {}
errors_lgrg = {}
for i in range(len(val_sizes)):
    curves_i = np.loadtxt(res_path_lgrg+'curves/train_'+str(i)+'.txt')
    j = 0
    for quantity in assess_qs:
        if i==0:
            curves_lgrg[quantity] = curves_i[j::8].reshape((1, -1))
            errors_lgrg[quantity] = curves_i[j+1::8].reshape((1, -1))
        else:
            curves_lgrg[quantity] = np.concatenate((curves[quantity], [curves_i[j::8]]), axis=0)
            errors_lgrg[quantity] = np.concatenate((errors[quantity], [curves_i[j+1::8]]), axis=0)
        j += 2

# plot training curves
plt.figure(figsize=(7,4))
plt.subplots_adjust(left=0.1,bottom=0.1,right=0.9,top=0.9,wspace=0.8,hspace=0.8)
for i in range(len(val_sizes)):
    plt.subplot(len(val_sizes), 1, i+1)
    plt.errorbar(train_sizes, curves_sg['val_acc'][i], errors_sg['val_acc'][i])
    plt.errorbar(train_sizes, curves_lgrg['val_acc'][i], errors_lgrg['val_acc'][i])
    plt.legend(['sg', 'lgrg'])
    plt.xlabel('training set size')
    plt.ylabel('{} val repeats x {} train repeats'.format(val_its, train_its))
    _ = plt.title('val. set size = {}, Dt = {}, Dvt = {}, Dv = {}'.format(val_sizes[i], sg_params['Dt'], sg_params['Dvt'], sg_params['Dv']))