In [1]:
import pandas as pd
import numpy as np
from ggplot import *
import glob
import os
import re
import fnmatch

In [2]:
from IPython.display import display
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
%matplotlib inline
from statsmodels.formula.api import ols
import seaborn as sns
import string

In [3]:
#the list of training & testing fnames
fnames = sorted(glob.glob('*.csv'))
training_files = [fname for fname in fnames if re.match('(\d*)_main_exp_(\w*)_(\w*)_(\w*)_(\d*_\w*_\d*)_(\d*)_data.csv', fname)]
testing_files = [fname for fname in fnames if re.match('(\d*)_main_exp_(\w*)_(\w*)_(\w*)_(\d*_\w*_\d*)_(\d*).csv', fname)]

In [4]:
#adding file_name colomn to each file
list_of_dfs = [pd.read_csv(fname) for fname in training_files]
for dataframe, fname in zip(list_of_dfs, training_files):
    dataframe['file_name'] = fname

In [5]:
# Combine a list of dataframes, on top of each other
training = pd.concat(list_of_dfs, ignore_index=True)

In [6]:
#merging block_order, block_order_bytype to each training file
info = pd.read_csv("training_fnames_info.csv")
training_data = pd.merge(training,
                 info[['file_name', 'block_order', 'block_order_bytype','subj_id']],
                 on ='file_name')
training_data = training_data.sort_values(by = ['subj_id','block_order'])
training_data = training_data.reset_index(drop = True)
training_data.head()

Unnamed: 0,pic_index,trial_num,pic_name,rt,resp,feedback_code,feedback,segment,segment_index,pic_type,trial_target,file_name,block_order,block_order_bytype,subj_id
0,1,1,lion,3.41695,space,1,correct,target,,targetNaN,lion,01_main_exp_training_lion_wbuilder_2018_Oct_16...,1.0,1.0,1
1,2,1,coffeepot,4.30022,left,1,correct,path,1.0,path1,lion,01_main_exp_training_lion_wbuilder_2018_Oct_16...,1.0,1.0,1
2,3,1,mosquito,1.366912,left,1,correct,path,2.0,path2,lion,01_main_exp_training_lion_wbuilder_2018_Oct_16...,1.0,1.0,1
3,4,1,canoe,0.750253,right,1,correct,path,3.0,path3,lion,01_main_exp_training_lion_wbuilder_2018_Oct_16...,1.0,1.0,1
4,5,1,goose,7.78349,right,1,correct,decision,1.0,decision1,lion,01_main_exp_training_lion_wbuilder_2018_Oct_16...,1.0,1.0,1


In [7]:
#learning criterion for each subject each block
def trial_criteria (subj_id,block,criteria):
    
    # select specific subject and block data from trainig_data
    subj_acc = training_data.loc[training_data.subj_id == subj_id]
    block_data = subj_acc.loc[subj_acc.block_order == block]
    block_data = block_data.reset_index(drop = True)
    
    #create a new dataframe that has column of trial_num and percentage of correct feedback
    #for each trial
    block_criteria = pd.DataFrame(columns = ['trial_num','feedback_sum'])
    
    # Get the percentage of correct feedback for each trial
    feedback_sum = []
    for i in range (1,block_data['trial_num'].iloc[-1]+1): # <------- use trial_num as a indicator for iterating through rows
        trial_sum = (block_data.loc[block_data['trial_num'] == i, 'feedback_code'].sum())/12
        feedback_sum.append(trial_sum)
        i += 1 
    block_criteria['trial_num'] = np.arange(len(feedback_sum)) + 1
    block_criteria['feedback_sum'] = feedback_sum

    #count the correct number of trials 
    correct = 0 
    percent = 0
    correct_count = []
    correct_percent = []
    for i in range (0,len(block_criteria)-(criteria-1)):
        iterate_trial = (block_criteria.iloc[i:(i+criteria)])['feedback_sum']
        for iterate_rows in iterate_trial:
            if iterate_rows == 1:
                correct += 1
                percent = correct / criteria
        correct_count.append(correct)
        correct_percent.append(percent)
        correct = 0
        percent = 0
    
    #create a new dataframe that has trial_num, overall_percentage
    overall_criteria = pd.DataFrame(columns = ['trial_criteria','overall_data'])
    overall_criteria['trial_criteria'] = np.arange(len(block_criteria['trial_num'])-(criteria-1)) + 1
    overall_criteria['overall_data'] = correct_count
    overall_criteria['trial_target'] = block_data['trial_target'][0] 
    overall_criteria['count'] = correct_count
    overall_criteria['trial_sq'] = overall_criteria['trial_criteria']**2

    return overall_criteria

In [None]:
trial_criteria (subj_id = 4,block = 4,criteria = 10)

In [9]:
def trial_criteria_graph(subj_id,criteria):
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(20, 5))
    plt.title('trial accuracy criteria = ' + str(criteria),loc = 'center')
    for j in range(1,5):
        overall_criteria = trial_criteria (subj_id,j,criteria)

        #get regression line for learning criteria
        #change sigmoid    
        formula_criteria = 'overall_data ~ trial_sq + trial_criteria'
        mod = ols(formula=formula_criteria, data=overall_criteria)
        results = mod.fit()
        #print(results.summary())
        predicted = results.predict()
        #print(predicted)

        # scatter plot of learning criteria
        target = overall_criteria['trial_target'][0]
        if target == "arc":
            target = "beaver"
        elif target == "bolt":
            target = "lion"
        axes[j-1].scatter(overall_criteria['trial_criteria'],overall_criteria['overall_data'],label = 'subj_' + str(subj_id) \
                + ', block ' + str(j) + ', ' + 'criteria = ' + str(criteria) + ', '+ target )
        axes[j-1].plot(overall_criteria['trial_criteria'], predicted, linewidth = 3) # <------------- Add predicted values
        axes[j-1].set_xlabel('trial bins') # <--------------- Set x axis label
        axes[j-1].legend(loc = "lower right")
        plt.yticks(np.arange(0, 1.2, step=0.2))
        axes[j-1].set_ylim(0, 1)
        axes[j-1].set_ylabel('Accuracy(percentage)') # <--------- Set y axis label
        j += 1
       
    #save subplots for each subject
    #plt.savefig('subj'+str(subj_id)+ '_criteria' + str(criteria) + '_trial_criteria.png')
    #plt.close() 


In [None]:
trial_criteria_graph(subj_id = 1,criteria = 5)

In [8]:
def trial_response_time(subj_id,block,criteria):
     # select specific subject and block data from trainig_data
    subj_acc = training_data.loc[training_data.subj_id == subj_id]
    block_data = subj_acc.loc[subj_acc.block_order == block]
    block_data = block_data.reset_index(drop = True)
    
    #create a new dataframe that has column of trial_num and percentage of correct feedback
    #for each trial
    block_rt = pd.DataFrame(columns = ['trial_num','rt'])
    
    # Get the percentage of correct feedback for each trial
    rt_mean = []
    for i in range (1,block_data['trial_num'].iloc[-1]+1): # <------- use trial_num as a indicator for iterating through rows
        rt = (block_data.loc[block_data['trial_num'] == i, 'rt'].sum())
        rt_mean.append(rt)
        i += 1 
    block_rt['trial_num'] = np.arange(len(rt_mean)) + 1
    block_rt['rt'] = rt_mean
    
    #average rt for each criteria
    rt_criteria = []
    for i in range (0,len(block_rt)-(criteria-1)):
        iterate_trial = (block_rt.iloc[i:(i+criteria)])['rt']
        rt_mean = iterate_trial.sum()/ criteria
        rt_criteria.append(rt_mean)
        
    #create a new dataframe that has trial_num, overall_percentage
    overall_rt = pd.DataFrame(columns = ['trial_criteria','rt_mean'])
    overall_rt['trial_criteria'] = np.arange(len(block_rt['trial_num'])-(criteria-1)) + 1    
    overall_rt['rt_mean'] = rt_criteria
    overall_rt['trial_sq'] = block_rt['trial_num'] ** 2
    return overall_rt
#trial_response_time(subj_id = 5,block=1,criteria = 5)

In [None]:
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit 
import scipy.optimize as optimize
def trial_rt_criteria_graph(subj_id,criteria):
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(20, 5))
    plt.title('all trials rt criteria = ' + str(criteria),loc = 'center')
    for j in range(1,5):
        overall_criteria = trial_criteria (subj_id,j,1,criteria)
        rt_criteria = trial_response_time(subj_id = subj_id,block = j,criteria = criteria)
        
        # log formula
        def func(t, a, b):
            return a + b * np.log(t)
        popt, pcov = optimize.curve_fit(func,rt_criteria['trial_criteria'] , rt_criteria['rt_mean'], maxfev=5000)
        t = rt_criteria['trial_criteria']
        
        target = overall_criteria['trial_target'][0]
        if target == "arc":
            target = "beaver"
        elif target == "bolt":
            target = "lion"
        
        #set up x_lim & y_lim
        if (j == 1) | (j == 2):
            max_rt1 = max(trial_response_time(subj_id = subj_id,block = 1,criteria = criteria)['rt_mean'])
            max_rt2 = max(trial_response_time(subj_id = subj_id,block = 2,criteria = criteria)['rt_mean'])
            if max_rt1 > max_rt2:
                max_rt = max_rt1
            else:
                max_rt = max_rt2
        elif (j == 3) | (j == 4):
            max_rt3 = max(trial_response_time(subj_id = subj_id,block = 3,criteria = criteria)['rt_mean'])
            max_rt4 = max(trial_response_time(subj_id = subj_id,block = 4,criteria = criteria)['rt_mean'])
            if max_rt3 > max_rt4:
                max_rt = max_rt3
            else:
                max_rt = max_rt4
        plt.yticks(np.arange(0,(max_rt + 1),step=(max_rt / 5)))
        axes[j-1].set_xlabel('trial bins') # <--------------- Set x axis label
        axes[j-1].set_ylim(0, max_rt)
        axes[j-1].set_ylabel('response time(average)') # <--------- Set y axis label
        axes[j-1].scatter(rt_criteria['trial_criteria'],rt_criteria['rt_mean'],label = 'subj_' + str(subj_id) \
                + ', block ' + str(j) + ', ' + 'criteria = ' + str(criteria) + ', ' + target + ', rt' ,color = 'green')
        axes[j-1].plot(rt_criteria['trial_criteria'], func(rt_criteria['trial_criteria'],*popt),
                       linewidth = 3,color = 'green',label = 'best fit') # <------------- Add predicted values
        axes[j-1].legend(loc = "lower right")
        j += 1
       
    
    #save subplots for each subject
    #plt.savefig('subj'+str(subj_id)+ '_criteria' + str(criteria) + '_trial_rt.png')
    #plt.close() 


In [None]:
trial_rt_criteria_graph(subj_id = 6,criteria = 10)

In [None]:
def rt_trials_graph(subj_id,criteria):
    trial_criteria_graph(subj_id = subj_id,criteria = criteria)
    trial_rt_criteria_graph(subj_id = subj_id,criteria = criteria)

In [None]:
def rt_trials_subjects(subject):
    criteria = 10
    for i in range(1,subject+1):
        rt_trials_graph(subj_id = i,criteria = criteria)
#rt_trials_subjects(30)