In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import os

# add the path to my packages to system paths so they can be imported
import sys
sys.path.append('/home/yasamanparhizkar/Documents/yorku/01_thesis/code/my_packages')
# sys.path.append('F:\MAScThesis\code\my_packages')
# sys.path.append('/home/yasamanparhizkar/Documents/thesis/code/my_packages')

import data_handler_03 as dh
import my_simgraph_06 as sg
import assess_simgraph_02 as asg

# Load spike data

Spike data shape:  (297, 1141, 113) $\implies$ (movie repeats, frames/time, neurons)
<br>
Labels are 1 (= spike) or -1 (= no spike).

In [2]:
# load all spike data from file
spikes_dp = '../../data/original_files/spikes.csv'
binned_data = np.loadtxt(spikes_dp, delimiter=',')
binned_data = binned_data.reshape(binned_data.shape[0], 1141, 113)
binned_data = binned_data * 2 - 1     # turn labels from 0,1 to -1,1

I_order_10 = [54, 35, 10, 60, 74, 9, 61, 56, 91, 104]

## Group all 113 neurons

This will create a more balanced dataset which is presumabley easier to solve.
<br>
Grouped data shape:  (297, 1141, 1) $\implies$ (movie repeats, frames/time, group)

In [3]:
# group all neurons together
grouped_data = np.zeros((297, 1141, 1))
for trial in range(297):
    for frame in range(1141):
        grouped_data[trial, frame, :] = 2 * int((binned_data[trial, frame, :] == 1).any()) - 1

In [4]:
# print some statistics
print('grouped_data.shape = ', grouped_data.shape)

avg_spike_perc = 0
print('trial #    | percentage belonging to class 1')
print('---------------------------------------------')
for trial in range(10):
    pers = dh.class_percentages(grouped_data[trial, :, :].reshape(-1), [-1, 1])
    avg_spike_perc += pers[1]
    print('trial #{:3} | {:.2f} %'.format(trial, pers[1]))

avg_spike_perc /= 10
print('---------------------------------------------')
print('AVERAGE     | {:.2f} %'.format(avg_spike_perc))

total_perc = np.sum(grouped_data == 1) *100 /(grouped_data.shape[0] * grouped_data.shape[1])
print('---------------------------------------------')
print('{:.2f} % of the whole data belongs to class 1.'.format(total_perc))

grouped_data.shape =  (297, 1141, 1)
trial #    | percentage belonging to class 1
---------------------------------------------
trial #  0 | 66.26 %
trial #  1 | 69.06 %
trial #  2 | 67.92 %
trial #  3 | 71.08 %
trial #  4 | 68.97 %
trial #  5 | 68.27 %
trial #  6 | 66.87 %
trial #  7 | 65.82 %
trial #  8 | 67.66 %
trial #  9 | 68.19 %
---------------------------------------------
AVERAGE     | 68.01 %
---------------------------------------------
68.47 % of the whole data belongs to class 1.


# Assess the model's performance with random tests

In [5]:
def datapoint_parham(index, data_params):
    """
    Return a single datapoint consisting of (feature vector, label) 
    based on the extended index system of the whole dataset (297 repeats of a 1141-frame movie); 
    for example, the 6th frame of the 7th repeat is indexed 7*1141+5. 
    In this system, indices only move forward after repeats, so they represent time in a sense.
    Acceptable index range is batch_sz-1 to 1141*297-1.
      
    Inputs: index, data_params
    index - chosen datapoint's index
    data_params   -
        func - funtion which returns a datapoint (fv, lbl) based on its index
        features_dp - path to where feature vectors are stored
        spike_data - (297 x 1141 x m)-shaped array where m is the number of subgroups of neurons.
        group_id - index of the chosen subgroup of neurons which is being considered
        transform - func. applied to the original feature vector (defult: None, no transform is applied)
            
    
    Output: fv, lbl
    fv  - Dfx1 vector representing the selected time bin's feature vector
    lbl - the selected time bin's label
    """
    # unpack params
    features_dp = data_params['features_dp']
    lbl_func = data_params['lbl_func']
    transform = data_params['transform'] if 'transform' in data_params else None
    
    # feature vector
    # trial = index//1141
    frame = index%1141
    fvs = np.load(features_dp)
    fv = fvs[frame - 41]
    if transform is not None:
        fv = transform(fv)

    # label  
    lbls = lbl_func(data_params)
    lbl = lbls[index]
    
    return fv, lbl

In [6]:
def transform(fv):
    """
    Transform to be applied on feature vectors.
    
    Input: fv
    fv - 1xDf torch tensor representing a feature vector
    
    Output: fvv
    fvv - 1xDf' torch tensor representing the transformed feature vector
    """
    
    # for faster run and less memory usage
    fvv = fv[::2]
    
    # for numerical stability during GD
    # fvv = fvv * 10
    
    return fvv

# data retrieval params
data_params = {'func': datapoint_parham, 'lbl_func': dh.get_labels, 'features_dp': '../../data/features/parham/parham2/features_test_2layer.npy', \
               'spike_data': grouped_data, 'group_id': 0, 'transform': None, 'ind_min': 1*1141+41, 'ind_max': 2*1141-1}

# graph construction and penalty term parameters
sg_params = {'mu': 39, 'Dt': None, 'Dv':0, 'Dvt':2000, \
             'cnstr_method_tt': 'time', 'cnstr_method_vv': 'time', 'cnstr_method_vt': 'time',\
             'train_t': None, 'val_t': None, \
             'edges_tt':None, 'edges_vv':None, 'edges_vt':None, }

# gradient descent parameters
sg_opt_params = { 'epsilon0':1, 'epsilon_decay':0.5, 'epsilon_jump': 2, \
                'num_its':16, 'check_freq':1, 'print_checks':False, 'Theta0':None, \
                'force_all_its': True, 'threshold': 0.01}

# randomization parameters
rnd_params = {'train_sizes': [50, 100, 150, 200, 250, 300], 'val_sizes': [10], 'train_its': 5, 'val_its': 10, 'seed': None}

# parameters to visualize the optimized M
f_sz = 400 # must match data_params
xloc = np.broadcast_to(np.arange(f_sz), (f_sz, f_sz))
yloc = xloc.T
fig_params = {'rmark_th': 85, 'f_sz': f_sz, 'xloc': xloc, 'yloc': yloc}

# path to save the results
res_path = '../../data/experiments/parham/parham_sg/temp/'

In [7]:
val_num_res, val_num_err = asg.assess_sg_model(data_params, sg_params, sg_opt_params, rnd_params, fig_params, res_path)
asg.plot_curves(rnd_params, sg_params, res_path)

In [8]:
# # reload sg package
# import importlib
# importlib.reload(asg)