# Wavelet results classification

## Occipital & vt classification

### step 1 load data_occipital

In [3]:
%pylab inline
import sklearn
from scipy.io import loadmat
import pandas as pd
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from scipy.ndimage.interpolation import shift

Populating the interactive namespace from numpy and matplotlib


In [56]:
#load in the final data: all subject,all levels, all orientations;
#data = np.load('data_all_subjects_final_log_BA17.npy')
data = np.load('allsubjs_vstacked_readyfor_classification_vt.npy')

In [57]:
data.shape 

(512, 5, 28, 8)

512 is all TRs, 5 is 5 levels, 28 orientation, 8 subjects.

In [58]:
#load in all the labels/conditions
subj_lst = ["s1","s2","s3","s5","s6","s7","s8","s10"]
conditions_lst = []

for subject in tqdm(subj_lst):
    
    conditions = loadmat(subject+'/conds_short_tlrc.mat')
    conditions = conditions['conds_short_tlrc']
    conditions_sh2 = shift(conditions,[0,2], cval=0) #shift by 2 TRs
    
    def convert_binary_to_multiclass(binary_conditions):
        """Convert binary representation into multiclass reprentation:
        For example: convert [[1 1 1 1 0 0 0 0]
                              [0 0 0 0 1 1 1 1]]
        to [1 1 1 1 2 2 2 2]"""
        x,y = np.where(binary_conditions)
        conditions=np.zeros(binary_conditions.shape[1])
        conditions[y]=x+1
        return conditions

    conditions_multi = convert_binary_to_multiclass(conditions_sh2)
    
    conditions_lst.append(conditions_multi)


100%|██████████| 8/8 [00:00<00:00, 651.42it/s]


In [59]:
conditions_final = np.vstack (conditions_lst)

In [60]:
conditions_final.shape

(8, 512)

In [61]:
np.save ("conditions_sh2_final.npy", conditions_final)

In [62]:
# Then I label all the conditions
face = conditions_multi == 1
objects = conditions_multi == 2
place = conditions_multi == 3
fruit = conditions_multi == 4

## Classification analysis


In [63]:
labels = conditions_final.reshape(4096,1)
labels.shape

(4096, 1)

In [64]:
no_res = labels!=0
no_res = no_res.reshape(512*8)# because I will use this as mask, so there is no ',1' in shape

In [65]:
labels_no_res = labels[no_res]
labels_no_res.shape

(1536, 1)

### Classification 06/29/2020

In [66]:
subject = 0 

level = 0

#animals = [1,2,3,4,5,6,7,8,9,10,11,12] # represent all the animals in 3 categories

run = 0


In [67]:
from sklearn import svm
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB

def wavefunc (subject, level, run): 
    """This function calculate each subject, each level,and using leaving one run out cross-validation 
        method for classification. Subjects are from 0-7; levels are from 0-4. For example, wavefunc (0,3,0)"""
    # First I will start with one subject. 

    subj_data = data[:,:,:,subject] # First choose one subject data
    subj_labels = conditions_final[subject,:]

    level_data = subj_data[:,level,:] # Then choose one level data

    #level_data.shape
   # choose the masks corresponding to each category in the pair
    #mask_1 = np.in1d (conditions_multi, animals)# np.in1d you need to make sure what shape you want, the shape you want should be put as the first element.
    #mask_2 = np.in1d (conditions_multi, animals)
    
    # Choose one run data according to the animal pair 
    tr_index = np.arange(512)
    # Each run has 32TRs, to choose the first run, will use the logic below
    #run_mask = np.logical_and(run*128 <= tr_index, tr_index < (run+1)*128) # instead of using leave-one-run-out, so run here is equivalent to folds
    run_mask = np.logical_and(run*32 <= tr_index, tr_index < (run+1)*32) # instead of using leave-one-run-out, so run here is equivalent to folds

    no_res_mask = subj_labels!= 0 
    #print run_mask.shape
    # Select the data by applying the mask
    # category_mask = np.logical_or(mask_1, mask_2)
    # category_mask = category_mask.reshape(1230)

    test_mask = np.logical_and(no_res_mask, run_mask) # choose the category pair and one run data
    train_mask = np.logical_and(no_res_mask,~run_mask) # ~ means invert mask, mean except that mask; choose the animal pairs and other 9 runs data

    #test_data =  level_data[test_mask,:]
    #train_data = level_data[train_mask,:]

    #Next generate the features matrix and label matrix to prep for the classification.
    labels_test = subj_labels[test_mask] # choose the category pair and run masks within all the conditions. test labels
    #labels_test =  (labels_test -1) // 4 # which convert 1-12 to "0,0,0,0, 1,1,1,1,2,2,2,2"
    features_test = level_data [test_mask,:] # test features

    labels_train = subj_labels[train_mask]
    #labels_train =  (labels_train -1) // 4
    features_train = level_data [train_mask,:]

    # Now we can run classification
    #scaler = StandardScaler()

    X_train, X_test = features_train, features_test
    y_train, y_test = labels_train, labels_test
    
    #import pdb ; pdb.set_trace()# stop here to debug
    #X_train_scaler =  scaler.fit_transform (X_train)
    #X_test_scaler=  scaler.transform (X_test)
    
    #Train the model using the training sets
    #model = svm.SVC()
    #model = KNeighborsClassifier(n_neighbors=10) 
    #model = GaussianNB()
    model = XGBClassifier(objective = 'multi:softmax')
    #model = XGBClassifier(silent=False, 
                      #scale_pos_weight=1,
                      #learning_rate=0.01,  
                      #colsample_bytree = 0.4,
                      #subsample = 0.8,
                      #objective='multi:softmax', 
                      #n_estimators=1000, 
                      #reg_alpha = 0.3,
                      #max_depth=4, 
                      #gamma=10)
    model.fit(X_train,y_train)# model fit training set.

    #Predict Output
    prediction_x_train = model.predict(X_train)
    predicted = model.predict(X_test) # 0:Overcast, 2:Mild
    
    acc = accuracy_score(y_test, predicted)
    
    return acc
    

    #now we have one run data for testing, we need 9 runs data set for training.


In [68]:
wavefunc(0,0,0)

0.3333333333333333

In [69]:
def wavefunc_cross_validation (subject, level):
    """ This function is just the wavefunc with cross-validation built-in"""   
    acc = []
    for i in range(16):
        acc.append(wavefunc(subject,level,i))

    acc =  np.asarray(acc)
    return acc.mean()

In [70]:
# Now I would like to loop over all the subjects, all the levels, and all the category pairs.

subjects = np.arange(8) # 8 subjects.

levels = np.arange (5) # 0-4; 5 levels.

In [71]:
# Loop over each subject:
from tqdm import tqdm

data_all = []

for subject in tqdm(subjects):
    for level in levels:
        acc = wavefunc_cross_validation(subject, level)

        result_dict = {
            "subject": subject,
            "level": level,
            "accuracy":acc,
        }
        data_all.append(result_dict)

100%|██████████| 8/8 [01:40<00:00, 12.62s/it]


In [72]:
df = pd.DataFrame(data_all)
df.to_csv("classification_vt_16folds_0728.csv", index=True) 