In [2]:
import numpy as np
import matplotlib.pyplot as plt
import matlab.engine # to run Cheng's code
import os

# add the path to my packages to system paths so they can be imported
import sys
sys.path.append('/home/yasamanparhizkar/Documents/yorku/01_thesis/code/my_packages')
# sys.path.append('F:\MAScThesis\code\my_packages')
# sys.path.append('/home/yasamanparhizkar/Documents/thesis/code/my_packages')

import data_handler_03 as dh
import my_simgraph_06 as sg
import assess_simgraph_05_fact_gdpa as asg

# Assess the model's performance with random tests

In [7]:
def get_mnist_labels(data_params):
    return np.loadtxt(data_params['features_dp']+'lbls.csv')

# data retrieval params
data_params = {'func': dh.datapoint_sift, 'lbl_func': get_mnist_labels, 'features_dp': '../../data/fe_exp/mnist-sift/', \
               'spike_data': None, 'group_id': None, 'transform': None, 'ind_min': 0, 'ind_max': 13203}

# graph construction and penalty term parameters
sg_params = {'mu': 30, 'Dt': None, 'Dv':0, 'Dvt':2000, \
             'cnstr_method_tt': 'time', 'cnstr_method_vv': 'time', 'cnstr_method_vt': 'time',\
             'train_t': None, 'val_t': None, \
             'edges_tt':None, 'edges_vv':None, 'edges_vt':None, }

# gradient descent parameters
sg_opt_params = { 'epsilon0':1, 'epsilon_decay':0.5, 'epsilon_jump': 2, \
                'num_its':16, 'check_freq':1, 'print_checks':False, 'Theta0':None, \
                'force_all_its': True, 'threshold': 0.01}

# randomization parameters
rnd_params = {'train_sizes': [50, 100, 150, 200, 250], 'val_sizes': [50], 'train_its': 5, 'val_its': 10, 'seed': None}
# rnd_params = {'train_sizes': [400], 'val_sizes': [10], 'train_its': 2, 'val_its': 3, 'seed': None}

# parameters to visualize the optimized M
f_sz = 256 # must match data_params
xloc = np.broadcast_to(np.arange(f_sz), (f_sz, f_sz))
yloc = xloc.T
fig_params = {'rmark_th': 30, 'f_sz': f_sz, 'xloc': xloc, 'yloc': yloc}

# start a matlab engine to run Cheng's code
eng = matlab.engine.start_matlab()
eng.cd(r'/home/yasamanparhizkar/Documents/yorku/01_thesis/code/15_cheng', nargout=0)

# path to save the results
res_path_fact = '../../data/experiments/comparison/temp/sg-fact/'
res_path_gdpa = '../../data/experiments/comparison/temp/sg-gdpa/'

In [8]:
# import importlib
# importlib.reload(sg)
# importlib.reload(asg)

In [None]:
# times will be measured together and  written in res_path_sg
val_num_res_sg, val_num_err_sg, val_num_res_lgrg, val_num_err_lgrg = asg.assess_sg_model(eng, data_params, sg_params, sg_opt_params, rnd_params, fig_params, res_path_fact, res_path_gdpa)

-> Iteration ID: 50_50_0
starting SGML.
initial objective value = -540.0195
 
converged objective value = -4314.2946
minimal eigenvalue of M = -0.0026937
done with SGML.
-> Iteration ID: 50_50_1
starting SGML.
initial objective value = -453.9745
 
converged objective value = -2823.1073
minimal eigenvalue of M = -0.0001547
done with SGML.
-> Iteration ID: 50_50_2
starting SGML.
initial objective value = -529.3192
 
converged objective value = -3944.7622
minimal eigenvalue of M = -1.1372e-05
done with SGML.
-> Iteration ID: 50_50_3
starting SGML.
initial objective value = -498.2192
 
converged objective value = -4484.3653
minimal eigenvalue of M = -3.3585e-06
done with SGML.
-> Iteration ID: 50_50_4
starting SGML.
initial objective value = -582.958
 
converged objective value = -5118.1851
minimal eigenvalue of M = -0.0023306
done with SGML.
-> Iteration ID: 50_100_0
starting SGML.
initial objective value = -1601.9049
 
converged objective value = -12407.9687
minimal eigenvalue of M = -0.

In [None]:
asg.plot_curves(rnd_params, sg_params, res_path_sg)

In [None]:
asg.plot_curves_without_runtime(rnd_params, sg_params, res_path_lgrg)

In [None]:
# close the matlab engine when you're done
eng.quit()

# Plot similarity graph and logistic regression validation accuracies together

In [None]:
# unpack params
train_sizes = rnd_params['train_sizes']
val_sizes = rnd_params['val_sizes']
train_its = rnd_params['train_its']
val_its = rnd_params['val_its']
assess_qs = ['min_acc', 'val_acc', 'missed', 'false_alarm']

# read sg training curves
curves_sg = {}
errors_sg = {}
for i in range(len(val_sizes)):
    curves_i = np.loadtxt(res_path_fact+'curves/train_'+str(i)+'.txt')
    j = 0
    for quantity in assess_qs:
        if i==0:
            curves_sg[quantity] = curves_i[j::8].reshape((1, -1))
            errors_sg[quantity] = curves_i[j+1::8].reshape((1, -1))
        else:
            curves_sg[quantity] = np.concatenate((curves[quantity], [curves_i[j::8]]), axis=0)
            errors_sg[quantity] = np.concatenate((errors[quantity], [curves_i[j+1::8]]), axis=0)
        j += 2
        
# read lgrg training curves
curves_lgrg = {}
errors_lgrg = {}
for i in range(len(val_sizes)):
    curves_i = np.loadtxt(res_path_gdpa+'curves/train_'+str(i)+'.txt')
    j = 0
    for quantity in assess_qs:
        if i==0:
            curves_lgrg[quantity] = curves_i[j::8].reshape((1, -1))
            errors_lgrg[quantity] = curves_i[j+1::8].reshape((1, -1))
        else:
            curves_lgrg[quantity] = np.concatenate((curves[quantity], [curves_i[j::8]]), axis=0)
            errors_lgrg[quantity] = np.concatenate((errors[quantity], [curves_i[j+1::8]]), axis=0)
        j += 2

# plot training curves
plt.figure(figsize=(7,4))
plt.subplots_adjust(left=0.1,bottom=0.1,right=0.9,top=0.9,wspace=0.8,hspace=0.8)
for i in range(len(val_sizes)):
    plt.subplot(len(val_sizes), 1, i+1)
    plt.errorbar(train_sizes, curves_sg['val_acc'][i], errors_sg['val_acc'][i])
    plt.errorbar(train_sizes, curves_lgrg['val_acc'][i], errors_lgrg['val_acc'][i])
    plt.legend(['sg', 'lgrg'])
    plt.xlabel('training set size')
    plt.ylabel('{} val repeats x {} train repeats'.format(val_its, train_its))
    _ = plt.title('val. set size = {}, Dt = {}, Dvt = {}, Dv = {}'.format(val_sizes[i], sg_params['Dt'], sg_params['Dvt'], sg_params['Dv']))
plt.savefig(res_path_gdpa+'val_acc_compare.png')