In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time

# add the path to my packages to system paths so they can be imported
import sys
sys.path.append('/home/yasamanparhizkar/Documents/yorku/01_thesis/simgraph/code/my_packages')
# sys.path.append('F:\MAScThesis\code\my_packages')
# sys.path.append('/home/yasamanparhizkar/Documents/thesis/code/my_packages')

import simgraph.my_simgraph as sg
import dataprocess.data_handler_03 as dh

# Unit Testing the simgraph main script

## Load data

In [2]:
# load all spike data from file
spikes_dp = '../../../local_data/original_files/'
grouped_data = np.load(spikes_dp+'summed_spikes.npy') # you can load alternative files from the same directory

I_order_10 = [54, 35, 10, 60, 74, 9, 61, 56, 91, 104]
grouped_data.shape

(297, 1141, 1)

In [3]:
def get_mnist_labels(data_params):
    return np.loadtxt(data_params['features_dp']+'lbls.csv')

def transform_mnistsift(fv):
    return fv[::4]

def transform_slowfast(fv):
    """
    Transform to be applied on feature vectors.
    
    Input: fv
    fv - 1xDf torch tensor representing a feature vector
    
    Output: fvv
    fvv - 1xDf' torch tensor representing the transformed feature vector
    """
    
    # for faster run and less memory usage
    fvv = fv[::200]
    
    # for numerical stability during GD
    # fvv = fvv * 10
    
    return fvv

def transform_sift3d(fv):
    return fv[::10]

def transform_soenet(fv):
    return fv[::5]/100

# data retrieval params
# data_params = {'func': dh.datapoint_sift, 'lbl_func': get_mnist_labels, 'features_dp': '../../data/fe_exp/mnist-sift/', \
#                'spike_data': None, 'group_id': None, 'transform': transform_mnistsift, 'ind_min': 0, 'ind_max': 13203, 'feature_id':'mnist-sift'}

# data_params = {'func': dh.datapoint_numpy, 'lbl_func': dh.get_labels, 'features_dp': '../../data/features/slowfast/slowfast_4732_numpy/', \
#                'spike_data': grouped_data, 'group_id': 0, 'transform': transform_slowfast, 'ind_min': 1*1141+0, 'ind_max': 2*1141-1, 'feature_id':'slowfast'}

# data_params = {'func': dh.datapoint_numpy, 'lbl_func': dh.get_labels, 'features_dp': '../../data/features/sift3d/fvs_s1_with_kp/desc/', \
#                'spike_data': grouped_data, 'group_id': 0, 'transform': transform_sift3d, 'ind_min': 1*1141+0, 'ind_max': 2*1141-1, 'feature_id':'sift3d'}

data_params = {'func': dh.datapoint_numpy, 'lbl_func': dh.get_labels, 'features_dp': '../../data/features/soenet/soenet3/features_2layer/', \
               'spike_data': grouped_data, 'group_id': 0, 'transform': transform_soenet, 'ind_min': 1*1141+41, 'ind_max': 2*1141-1, 'feature_id':'soenet'}

In [4]:
train_num = 200
val_num = 50

train_num, val_num, train_data, val_data = \
dh.random_train_val(train_num, val_num, data_params, seed=0)

# optional normalization
# train_data['des'] = dh.normalize(train_data['des'])
# val_data['des'] = dh.normalize(val_data['des'])

# show statistics
print('feature_id: ', data_params['feature_id'])
print('train_num = ', train_num, ', val_num = ', val_num)
print('number of features: ', train_data['des'].shape[1])

print('train_smpls = ', train_data['smpls'], '\nval_smpls = ', val_data['smpls'])
print('train_lbls = ', train_data['lbls'], '\nval_lbls = ', val_data['lbls'])
print('train_des = ', train_data['des'], '\nval_des = ', val_data['des'])

feature_id:  soenet
train_num =  200 , val_num =  50
number of features:  80
train_smpls =  [1920 2110 1305 1405 1280 1982 2134 1589 1260 1771 1864 1514 2005 2185
 1616 1448 1377 1558 1572 1799 1526 1735 1293 2152 2114 1375 2174 1238
 1733 1686 1617 1790 1661 1808 1253 1426 1653 1474 1318 1844 1706 1261
 1854 1968 1951 1604 1925 1763 1516 1483 1189 1254 1504 2088 2139 1840
 2233 1730 2155 2207 2078 1329 2256 1609 1880 1722 1326 1343 2130 2231
 1981 1928 1630 2138 1445 1527 1824 1896 1378 1933 2020 2232 1608 1186
 2268 1196 1482 1410 1212 2026 2204 2183 1973 1340 1537 2104 2278 1675
 1336 1227 1773 2240 2179 2048 1594 1804 1905 1533 2094 1412 1825 2045
 2202 1201 2018 1749 1809 1430 2216 2196 2028 1654 1891 1566 2145 1199
 1337 1286 2159 1257 1396 2215 1550 2189 1995 1620 1945 1542 1932 1498
 1736 1229 1231 2252 1811 1955 1539 1681 2052 1541 1469 1207 1211 1881
 1413 2146 1789 1490 1910 1931 1419 2058 2103 2000 1940 1720 1782 1719
 2210 1584 2273 2219 2097 2129 2211 2276 1300 1249 1466 

## Set parameters

In [5]:
# visualize the learned metric matrix
def visualize_M(M, fig_params):
    # unpack params
    rmark_th = fig_params['rmark_th']
    xloc = fig_params['xloc']
    yloc = fig_params['yloc']

    sg.display_matrix(M, None)
    # mark prominent elements          
    lim = (rmark_th/100) * np.max(M) # marker threshold                
    plt.plot(xloc[M > lim],yloc[M > lim], marker='o', markersize=3, color='r', linestyle='')
    plt.title('M - marked above {}%'.format(rmark_th))

In [6]:
# graph construction parameters (common for all three methods)
sg_params = {'Dt': None, 'Dv': None, 'Dvt':20, \
             'cnstr_method_tt': 'time-glr', 'cnstr_method_vv': 'time-glr', 'cnstr_method_vt': 'time',\
             'train_t': None, 'val_t': None, \
             'edges_tt':None, 'edges_vv':None, 'edges_vt':None, }

# gradient descent parameters (only used for factobj1)
gd_opt_params = { 'epsilon0':1, 'epsilon_decay':0.5, 'epsilon_jump': 1.7, \
                'num_its':100, 'check_freq':10, 'print_checks':True, 'Theta0':None, \
                'force_all_its': True, 'threshold': -1}

# objective parameters (for both lmnn and obj1)
sg_params['mu'] = 1 #glr requires this objective parameter to be part of the sg_params dict

# random parameter for edge selection and B initialization
seed = 0

In [7]:
# parameters to visualize the optimized M
f_sz = train_data['des'].shape[1] # must match data_params
xloc = np.broadcast_to(np.arange(f_sz), (f_sz, f_sz))
yloc = xloc.T
fig_params = {'rmark_th': 50, 'f_sz': f_sz, 'xloc': xloc, 'yloc': yloc}

In [8]:
# update sg_params
sg_params['train_t'] = train_data['smpls']
sg_params['val_t'] = val_data['smpls']

# set edges
sg_params['edges_tt'] = sg.get_edges_tt(train_data['lbls'], sg_params['Dt'], sg_params['cnstr_method_tt'], sg_params['train_t'], seed)
len(sg_params['edges_tt'])

16692

## New edge selection strategy

In [39]:
import importlib
importlib.reload(sg)

<module 'simgraph.my_simgraph' from '/home/yasamanparhizkar/Documents/yorku/01_thesis/simgraph/code/my_packages/simgraph/my_simgraph.py'>

In [40]:
def get_edges_vt_time1(train_lbls, num_val, Dvt, train_t, val_t):
    """
    Refer to documentation for 'get_edges_vt'
    """
    edges_vt = []
    for i in range(num_val):
        train_inds = np.arange(train_lbls.shape[0])
        diff = train_t - val_t[i]
        
        num_neg = min(Dvt//2, len(diff[diff < 0]))
        num_pos = min(Dvt - num_neg, len(diff[diff >= 0]))
        
        sorted_train_nodes = sorted(list(zip(train_inds[diff < 0], diff[diff < 0])), key=lambda x: x[1], reverse=True)
        temp = sorted_train_nodes[:num_neg]
        sorted_train_nodes = sorted(list(zip(train_inds[diff >= 0], diff[diff >= 0])), key=lambda x: x[1], reverse=False)
        temp += sorted_train_nodes[:num_pos]
        
        for (j, t) in temp:
            edges_vt.append((i, j))
        
        
    return edges_vt

In [41]:
def get_edges_vt_time(train_lbls, num_val, Dvt, train_t, val_t):
    """
    Refer to documentation for 'get_edges_vt'
    """
    edges_vt = []
    for i in range(num_val):
        train_inds = np.arange(train_lbls.shape[0])
        diff = train_t - val_t[i]
        
        sorted_train_nodes = sorted(list(zip(train_inds, np.abs(diff))), key=lambda x:x[1])
        temp = sorted_train_nodes[:Dvt]
        
        for (j, t) in temp:
            edges_vt.append((i, j))
        
        
    return edges_vt

In [42]:
# test correctness with a toy example
edges_vt = get_edges_vt_time(train_data['lbls'], val_num, sg_params['Dvt'], sg_params['train_t'], sg_params['val_t'])

In [43]:
# test the function in script
edges_vt_inscr = sg.get_edges_vt(val_num, train_data['lbls'], sg_params, seed)

In [44]:
edges_vt == edges_vt_inscr

True

### Toy example test

In [50]:
# create toy example
train_num = 7
train_lbls = np.random.rand(train_num)
val_num = 3
Dvt = 4
train_t = np.array([1,5,7,21,8,13,11])
val_t = np.array([10,13,2])
edges_vt = get_edges_vt_time(train_lbls, val_num, Dvt, train_t, val_t)

edges_vt

[(0, 6),
 (0, 4),
 (0, 2),
 (0, 5),
 (1, 5),
 (1, 6),
 (1, 4),
 (1, 2),
 (2, 0),
 (2, 1),
 (2, 2),
 (2, 4)]