In [3]:
import graphlab as gl
import numpy as np
import pandas as pd
import os
from pylab import *

In [9]:
# input parameters
#=================================
classifier_list = ['LR']
#=================================

window_length_list = [200]
window_step = 50
feature_type_list = [1] #1: spk
l2_penalty_list = np.logspace(-2, 1.5, num=15, endpoint=True, base=10.0, dtype=None).tolist()
l1_penalty = 0.0

for mouse_id in range(1,7):

    for win_ln_idx in range(len(window_length_list)): 
        window_length = window_length_list[win_ln_idx]
        
        ##========================= load features
        # print on screen: progress
        current_progress = ("\n========================================\nLoading features: mouse%02d, window_length%03d ...\n" \
                            % (mouse_id, window_length))
        print current_progress

        # load and organize feature matrix
        comb = lz_load_feature_matrix(mouse_id, window_length, window_step)       
        
        for classifier_idx in range(len(classifier_list)):
            classifier = classifier_list[classifier_idx]

            # initiate
            AC = [[0] * len(l2_penalty_list) for _ in range(len(feature_type_list))] 
            SE = [[0] * len(l2_penalty_list) for _ in range(len(feature_type_list))] 
            SP = [[0] * len(l2_penalty_list) for _ in range(len(feature_type_list))] 

            for feature_type_idx in range(len(feature_type_list)):
                feature_type = feature_type_list[feature_type_idx]

                ##========================= classification
                for l2_penalty_idx in range(len(l2_penalty_list)):
                    l2_penalty = l2_penalty_list[l2_penalty_idx]

                    # print on screen: progress
                    current_progress = ("\n----------------------------------------\nClassifying --> classifier: %s, feature_type%03d, l2_penalty_%f ..." \
                                        % (classifier, feature_type, l2_penalty))
                    print current_progress

                    if classifier == 'LR':
                        AC[feature_type_idx][l2_penalty_idx],SE[feature_type_idx][l2_penalty_idx],SP[feature_type_idx][l2_penalty_idx] = \
                            lz_logistic_AC_SE_SP(comb, feature_type, l2_penalty, l1_penalty)

            ##====================== save results
            AC_saveName = ("%s_spk_mouse%02d_window_length%03d_AC.csv" \
                           % (classifier,mouse_id,window_length))
            SE_saveName = ("%s_spk_mouse%02d_window_length%03d_SE.csv" \
                           % (classifier,mouse_id,window_length))
            SP_saveName = ("%s_spk_mouse%02d_window_length%03d_SP.csv" \
                           % (classifier,mouse_id,window_length))
            df_AC = pd.DataFrame(AC)
            df_SE = pd.DataFrame(SE)        
            df_SP = pd.DataFrame(SP)  
            
            os.chdir("/Users/lizhu/Dropbox/projects/calcium/comparison/ROI4FluoroSNNAP/") 
            df_AC.to_csv(AC_saveName, index = False, header = False)
            df_SE.to_csv(SE_saveName, index = False, header = False)
            df_SP.to_csv(SP_saveName, index = False, header = False)


Loading features: mouse01, window_length200 ...


----------------------------------------
Classifying --> classifier: LR, feature_type001, l2_penalty_0.010000 ...
AC_mean =  0.866550348953
AC_std =  0.0210302742719
SE_mean =  0.568049987949
SE_std =  0.0456578152788
SP_mean =  0.941600705783
SP_std =  0.0221423274847

----------------------------------------
Classifying --> classifier: LR, feature_type001, l2_penalty_0.017783 ...
AC_mean =  0.868238142715
AC_std =  0.0221738123953
SE_mean =  0.572049987949
SE_std =  0.0429445711637
SP_mean =  0.94264237245
SP_std =  0.0235925691916

----------------------------------------
Classifying --> classifier: LR, feature_type001, l2_penalty_0.031623 ...
AC_mean =  0.868238142715
AC_std =  0.0221738123953
SE_mean =  0.572049987949
SE_std =  0.0429445711637
SP_mean =  0.94264237245
SP_std =  0.0235925691916

----------------------------------------
Classifying --> classifier: LR, feature_type001, l2_penalty_0.056234 ...
AC_mean =  0.86992593647

In [6]:
# load and organize feature matrix
def lz_load_feature_matrix(mouse_id, window_length, window_step):
    fileName = "format4ML_spike_mouse_0" + str(mouse_id) + "_windowLen" + str(window_length) + "_winStep_0" + str(window_step) + ".csv"
    loadPath = "/Users/lizhu/Dropbox/projects/calcium/comparison/ROI4FluoroSNNAP/" + fileName
    comb = gl.SFrame.read_csv(loadPath, delimiter=',',header=False,verbose = False)
    colName_dg = 'spikes'
    colName_dg = gl.SArray([colName_dg + repr(i+1) for i in range(30)])

    colName = colName_dg.append(gl.SArray(['Whisker']))
    colName = (list(colName))
    dictionary = dict(zip(comb.column_names(), colName))
    comb = comb.rename(dictionary)
    comb = gl.toolkits.cross_validation.shuffle(comb, random_seed=1)
    # comb['Whisker'].show(view = 'Categorical')
    
    return comb

In [7]:
# train and cross-validation
def lz_logistic_AC_SE_SP(data, feature_type, l2_penalty, l1_penalty):
    
    # clearify features
    feature_spk = comb.column_names()[0:30] # feature 1~30: degree, 31~60: clustering coefficient, 60~90: pathlength
    
    if feature_type == 1:   feature = feature_spk

    # Kfold
    num_fold = 10
    folds = gl.cross_validation.KFold(comb, num_fold)
    SE = [None] * num_fold
    SP = [None] * num_fold
    AC = [None] * num_fold
    # print specificity
    idx = 0
    for train, valid in folds:
        m = gl.logistic_classifier.create(train,
                                          target='Whisker',
                                          features=feature,
                                          l2_penalty = l2_penalty, 
                                          l1_penalty = l1_penalty,
                                          validation_set=None,
                                          verbose = False)
        confusion_matrix = m.evaluate(valid, 'confusion_matrix')
        confusion_matrix = confusion_matrix.values()[0]
        
        TP, TN, FP, FN = lz_extract_ACC_SE_SP(confusion_matrix)
        
        SP[idx] = float(TN) / (TN + FP)
        SE[idx] = float(TP) / (TP + FN)
        AC[idx] = float(TP+TN) / (TP+TN+FP+FN)
        idx = idx + 1
        
    AC_mn = np.mean(AC)
    AC_sd = np.std(AC)
    SE_mn = np.mean(SE)
    SE_sd = np.std(SE)
    SP_mn = np.mean(SP)
    SP_sd = np.std(SP)
    
    print 'AC_mean = ', AC_mn
    print 'AC_std = ',  AC_sd
    print 'SE_mean = ', SE_mn
    print 'SE_std = ',  SE_sd
    print 'SP_mean = ', SP_mn
    print 'SP_std = ',  SP_sd
    
    return AC_mn, SE_mn, SP_mn

In [8]:
def lz_extract_ACC_SE_SP(confusion_matrix):
    TP = confusion_matrix[(confusion_matrix['target_label']==1) & (confusion_matrix['predicted_label']==1)]
    if np.size(TP) == 0:
        TP = 0
    else:
        TP = TP['count'][0]
    TN = confusion_matrix[(confusion_matrix['target_label']==0) & (confusion_matrix['predicted_label']==0)]
    if np.size(TN) == 0:
        TN = 0
    else:
        TN = TN['count'][0]
    FP = confusion_matrix[(confusion_matrix['target_label']==0) & (confusion_matrix['predicted_label']==1)]
    if np.size(FP) == 0:
        FP = 0
    else:
        FP = FP['count'][0]
    FN = confusion_matrix[(confusion_matrix['target_label']==1) & (confusion_matrix['predicted_label']==0)]
    if np.size(FN) == 0:
        FN = 0
    else:
        FN = FN['count'][0]
    
    return TP, TN, FP, FN