## combine 1st cohort and 2nd cohort S3 baseline data

In [40]:
import pandas as pd
from datetime import datetime
import numpy as np
import math
from scipy.stats import norm
from scipy.stats import sem

In [41]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style(style="white")
from openpyxl import load_workbook

In [42]:
import statsmodels.formula.api as smf
import scipy.stats as stats
import statsmodels.api as sm
import researchpy as rp

In [43]:
from statsmodels.stats.anova import AnovaRM
import scikit_posthocs as sp

### load 1st cohort s3 baseline data

In [44]:
import glob
import os

In [45]:
#get the list of ABETII exported data of first cohort
df_list=glob.glob("......\\raw data\*.csv") #replace the "......" to a full directory of the abetII exported raw data

In [46]:
# combine ABET II export chunk
df=pd.DataFrame()
for i in df_list:
    df_=pd.read_csv(i)
    print(df_.shape)
    df=pd.concat([df, df_], ignore_index=True)
    print(df.shape)

(114, 133)
(114, 133)
(131, 144)
(245, 144)
(101, 133)
(346, 144)


In [47]:
# add cohort feature
df['cohort']="coh_1"
s3_1=df.copy()

### load 2nd cohort s3 baseline data

In [48]:
#get the list of ABETII exported data of 2nd cohort
df_list=glob.glob("......\\raw data\*.csv")#replace the "......" to a full directory of the abetII exported raw data

In [49]:
# combine ABET II export chunk
df=pd.DataFrame()
for i in df_list:
    df_=pd.read_csv(i)
    print(df_.shape)
    df=pd.concat([df, df_], ignore_index=True)
    print(df.shape)

(144, 100)
(144, 100)
(136, 111)
(280, 111)
(1, 100)
(281, 111)


In [50]:
# add cohort feature
df['cohort']="coh_2"
s3_2=df.copy()

In [51]:
# combine 1st cohort and 2nd cohort
s3=pd.concat([s3_1, s3_2], ignore_index=True)

In [52]:
# Add gander feature
s3.loc[:, "gander"]=np.where(s3["Group ID"].isin(["W", "X", "Y", "Z"]), "male", "female")

In [53]:
s3.loc[:, "Schedule run date short"]=pd.to_datetime(s3["Schedule run date"]).dt.date

# add key by using id and run time
s3.loc[:, "key"]=s3['Group ID'] + "-" + s3["Animal ID"] + "-" + s3["Schedule run date short"].astype('str') + "-" + s3['cohort']
s3.loc[:, "id"]=s3['Group ID'] + "-" + s3["Animal ID"]+ "-" + s3['cohort']

# convert short time to datetime format
s3["Schedule run date short"]=pd.to_datetime(s3["Schedule run date short"], format="%Y-%m-%d")

##### mice id 'W-N-5-coh_1' is a typo in ABET II database, the correct one should be W-N-4-coh_1. I fixed this error in raw data here. 

In [54]:
#change W-N-5-coh_1 to W-N-4-coh_1
s3.loc[s3["id"]=="W-N-5-coh_1", "Animal ID"] = "N-4"
s3.loc[:, "key"]=s3['Group ID'] + "-" + s3["Animal ID"] + "-" + s3["Schedule run date short"].astype('str') + "-" + s3['cohort']
s3.loc[:, "id"]=s3['Group ID'] + "-" + s3["Animal ID"]+ "-" + s3['cohort']
s3["id"].unique()

# update the key and id after change the N-5 to N-4
s3.loc[:, "key"]=s3['Group ID'] + "-" + s3["Animal ID"] + "-" + s3["Schedule run date short"].astype('str') + "-" + s3['cohort']
s3.loc[:, "id"]=s3['Group ID'] + "-" + s3["Animal ID"]+ "-" + s3['cohort']

In [55]:
# Add session order feature
s3['ses_order']=s3.groupby(by=['id'])['Schedule run date short'].transform(lambda x:x.rank())
s3['session_order']="session_"+s3['ses_order'].astype('int64').astype('str')

In [56]:
s3[(s3["Animal ID"]=='N-4') & (s3["Group ID"]=='W') & (s3["cohort"]=='coh_1')]

Unnamed: 0,Schedule run date,Schedule name,Environment name,Group ID,Animal ID,Correct Choice Latency Bin last 45min,Incorrect Choice Latency Bin last 45min,Reward Retrieval Latency Bin last 45min,Correct Choice Latency Bin first 45min,Incorrect Choice Latency Bin first 45min,...,15 min bin - Correct Rejections (7),15 min bin - Correction Trial Correct Rejections (7),15 min bin - Correction Trial Mistakes (7),cohort,gander,Schedule run date short,key,id,ses_order,session_order
119,10/26/2021 10:45,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL house...,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.64,1.486,...,,,,coh_1,male,2021-10-26,W-N-4-2021-10-26-coh_1,W-N-4-coh_1,1.0,session_1
123,10/27/2021 11:21,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL house...,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.661,1.384,...,,,,coh_1,male,2021-10-27,W-N-4-2021-10-27-coh_1,W-N-4-coh_1,2.0,session_2
127,10/28/2021 11:19,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL house...,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.593,1.641,...,,,,coh_1,male,2021-10-28,W-N-4-2021-10-28-coh_1,W-N-4-coh_1,3.0,session_3
131,11/4/2021 11:10,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.938,1.759,...,,,,coh_1,male,2021-11-04,W-N-4-2021-11-04-coh_1,W-N-4-coh_1,4.0,session_4
135,11/5/2021 10:42,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.743,2.008,...,,,,coh_1,male,2021-11-05,W-N-4-2021-11-05-coh_1,W-N-4-coh_1,5.0,session_5
139,11/10/2021 11:01,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.542,1.801,...,,,,coh_1,male,2021-11-10,W-N-4-2021-11-10-coh_1,W-N-4-coh_1,6.0,session_6
143,11/11/2021 10:29,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.682,1.465,...,,,,coh_1,male,2021-11-11,W-N-4-2021-11-11-coh_1,W-N-4-coh_1,7.0,session_7
147,11/12/2021 10:40,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.758,1.674,...,,,,coh_1,male,2021-11-12,W-N-4-2021-11-12-coh_1,W-N-4-coh_1,8.0,session_8
151,11/15/2021 11:03,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.448,1.613,...,,,,coh_1,male,2021-11-15,W-N-4-2021-11-15-coh_1,W-N-4-coh_1,9.0,session_9
155,11/16/2021 11:06,Mouse ICPT Stage 3 Var ITI v1 HORIZONTAL,Chamber4 [4],W,N-4,0.0,0.0,0.0,1.764,1.737,...,,,,coh_1,male,2021-11-16,W-N-4-2021-11-16-coh_1,W-N-4-coh_1,10.0,session_10


In [57]:
# check whether includes 0 row
s3.loc[s3['End Summary - Hits (1)']==0]

Unnamed: 0,Schedule run date,Schedule name,Environment name,Group ID,Animal ID,Correct Choice Latency Bin last 45min,Incorrect Choice Latency Bin last 45min,Reward Retrieval Latency Bin last 45min,Correct Choice Latency Bin first 45min,Incorrect Choice Latency Bin first 45min,...,15 min bin - Correct Rejections (7),15 min bin - Correction Trial Correct Rejections (7),15 min bin - Correction Trial Mistakes (7),cohort,gander,Schedule run date short,key,id,ses_order,session_order


In [58]:
# test whether include duplicated row
t=s3[s3.duplicated('key', keep=False)].sort_values(by='key')
t["key"].unique()

array([], dtype=object)

In [60]:
s3["id"].unique()

array(['A-RL-3-coh_1', 'A-R-1-coh_1', 'A-N-4-coh_1', 'A-L-2-coh_1',
       'W-R-1-coh_1', 'W-L-2-coh_1', 'W-RL-3-coh_1', 'W-N-4-coh_1',
       'X-N-5-coh_1', 'X-RL-6-coh_1', 'X-L-7-coh_1', 'X-R-8-coh_1',
       'A-L-2-coh_2', 'A-N-4-coh_2', 'B-L-7-coh_2', 'B-N-5-coh_2',
       'W-L-2-coh_2', 'W-N-4-coh_2', 'X-L-7-coh_2', 'X-N-5-coh_2',
       'Y-R-9-coh_2', 'Y-RL-11-coh_2', 'Z-R-16-coh_2', 'Z-RL-14-coh_2',
       'A-R-1-coh_2', 'A-RL-3-coh_2', 'B-R-8-coh_2', 'B-RL-6-coh_2',
       'W-R-1-coh_2', 'W-RL-3-coh_2', 'X-R-8-coh_2', 'X-RL-6-coh_2',
       'Y-L-10-coh_2', 'Y-N-12-coh_2', 'Z-L-15-coh_2', 'Z-N-13-coh_2'],
      dtype=object)

In [61]:
print(len(s3["id"].unique()))

36


In [62]:
df_clean=s3.copy()

### calculated d' and implus parameter

In [63]:
def hr_calcu(hit, miss):
    hr_rate = hit/(hit+miss)
    return hr_rate

def fr_calcu(mistake, correct_rejection):
    fr_rate = mistake/(mistake+correct_rejection)
    return fr_rate

def d_calcu(hit, miss, mistake, correct_rejection):
    d=norm.ppf(hr_calcu(hit, miss))-norm.ppf(fr_calcu(mistake, correct_rejection))
    return d  

def si_calcu(hit, miss, mistake, correct_rejection):
    si=(hr_calcu(hit, miss)-fr_calcu(mistake, correct_rejection))/(2*(hr_calcu(hit, miss)+fr_calcu(mistake, correct_rejection))-pow((hr_calcu(hit, miss)+fr_calcu(mistake, correct_rejection)), 2))
    return si

def c_calcu(hit, miss, mistake, correct_rejection):
    c=-(norm.ppf(hr_calcu(hit, miss))+norm.ppf(fr_calcu(mistake, correct_rejection)))/2
    return c

def ri_calcu(hit, miss, mistake, correct_rejection):
    ri=(hr_calcu(hit, miss)+fr_calcu(mistake, correct_rejection)-1)/(1-pow((hr_calcu(hit, miss)-fr_calcu(mistake, correct_rejection)), 2))
    return ri

def rp_calcu(centre_ITI_Touches, hit, miss, mistake, correct_rejection, Correction_Trial_Correct_Rejection, Correction_Trial_Mistakes):
    rp=100*(centre_ITI_Touches)/(hit+miss+mistake+correct_rejection+Correction_Trial_Correct_Rejection+Correction_Trial_Mistakes)
    return rp

In [64]:
# #another way to replace "space" in the column name
column_name_list = df_clean.columns.tolist()
new_column=[]
for i in column_name_list:
    i_=i.replace(' ','_')
    new_column.append(i_)

df_clean.columns=new_column
# first_20_session['Environment_name']

In [65]:
df_clean.rename(columns={'Correct_Choice_Latency_Bin_1_(1-15min)': 'corr_latency_bin_1',
 'Correct_Choice_Latency_Bin_2_(15-30min)': 'corr_latency_bin_2',
 'Correct_Choice_Latency_Bin_3_(30-45min)': 'corr_latency_bin_3',
 'Correct_Choice_Latency_Bin_4_(45-60min)': 'corr_latency_bin_4',
 'Correct_Choice_Latency_Bin_5_(60-75min)': 'corr_latency_bin_5',
 'Correct_Choice_Latency_Bin_6_(75-90min)': 'corr_latency_bin_6',
 'Incorrect_Choice_Latency_Bin_1_(1-15min)': 'incorr_latency_bin_1',
 'Incorrect_Choice_Latency_Bin_2_(15-30min)': 'incorr_latency_bin_2',
 'Incorrect_Choice_Latency_Bin_3_(30-45min)': 'incorr_latency_bin_3',
 'Incorrect_Choice_Latency_Bin_4_(45-60min)': 'incorr_latency_bin_4',
 'Incorrect_Choice_Latency_Bin_5_(60-75min)': 'incorr_latency_bin_5',
 'Incorrect_Choice_Latency_Bin_6_(75-90min)': 'incorr_latency_bin_6',
 'Reward_Retrieval_Latency_Bin_1_(0-15min)':'reward_latency_bin_1',
 'Reward_Retrieval_Latency_Bin_2_(15-30min)':'reward_latency_bin_2',
 'Reward_Retrieval_Latency_Bin_3_(30-45min)':'reward_latency_bin_3',
 'Reward_Retrieval_Latency_Bin_4_(45-60min)':'reward_latency_bin_4',
 'Reward_Retrieval_Latency_Bin_5_(60-75min)':'reward_latency_bin_5',
 'Reward_Retrieval_Latency_Bin_6_(75-90min)':'reward_latency_bin_6',
 'Correct_Choice_Latency_Bin_1_(1-15min)_sd': 'corr_latency_bin_1_sd',
 'Correct_Choice_Latency_Bin_2_(15-30min)_sd': 'corr_latency_bin_2_sd',
 'Correct_Choice_Latency_Bin_3_(30-45min)_sd': 'corr_latency_bin_3_sd',
 'Correct_Choice_Latency_Bin_4_(45-60min)_sd': 'corr_latency_bin_4_sd',
 'Correct_Choice_Latency_Bin_5_(60-75min)_sd': 'corr_latency_bin_5_sd',
 'Correct_Choice_Latency_Bin_6_(75-90min)_sd': 'corr_latency_bin_6_sd',
 'Incorrect_Choice_Latency_Bin_1_(1-15min)_sd': 'incorr_latency_bin_1_sd',
 'Incorrect_Choice_Latency_Bin_2_(15-30min)_sd': 'incorr_latency_bin_2_sd',
 'Incorrect_Choice_Latency_Bin_3_(30-45min)_sd': 'incorr_latency_bin_3_sd',
 'Incorrect_Choice_Latency_Bin_4_(45-60min)_sd': 'incorr_latency_bin_4_sd',
 'Incorrect_Choice_Latency_Bin_5_(60-75min)_sd': 'incorr_latency_bin_5_sd',
 'Incorrect_Choice_Latency_Bin_6_(75-90min)_sd': 'incorr_latency_bin_6_sd',
 'Reward_Retrieval_Latency_Bin_1_(0-15min)_sd': 'reward_latency_bin_1_sd',
 'Reward_Retrieval_Latency_Bin_2_(15-30min)_sd': 'reward_latency_bin_2_sd',
 'Reward_Retrieval_Latency_Bin_3_(30-45min)_sd': 'reward_latency_bin_3_sd',
 'Reward_Retrieval_Latency_Bin_4_(45-60min)_sd': 'reward_latency_bin_4_sd',
 'Reward_Retrieval_Latency_Bin_5_(60-75min)_sd': 'reward_latency_bin_5_sd',
 'Reward_Retrieval_Latency_Bin_6_(75-90min)_sd': 'reward_latency_bin_6_sd'}, inplace=True)

In [66]:
df_clean.columns.to_list()

['Schedule_run_date',
 'Schedule_name',
 'Environment_name',
 'Group_ID',
 'Animal_ID',
 'Correct_Choice_Latency_Bin_last_45min',
 'Incorrect_Choice_Latency_Bin_last_45min',
 'Reward_Retrieval_Latency_Bin_last_45min',
 'Correct_Choice_Latency_Bin_first_45min',
 'Incorrect_Choice_Latency_Bin_first_45min',
 'Reward_Retrieval_Latency_Bin_first_45min',
 'Correct_Choice_Latency_Bin_first_45min_sd',
 'Incorrect_Choice_Latency_Bin_first_45min_sd',
 'Reward_Retrieval_Latency_Bin_first_45min_sd',
 'Correct_Choice_Latency_Bin_last_45min_sd',
 'Incorrect_Choice_Latency_Bin_last_45min_sd',
 'Reward_Retrieval_Latency_Bin_last_45min_sd',
 '15_min_bin_-_No_of_non_correction_trials_(1)',
 '15_min_bin_-_No_of_non_correction_trials_(2)',
 '15_min_bin_-_No_of_non_correction_trials_(3)',
 '15_min_bin_-_No_of_non_correction_trials_(4)',
 '15_min_bin_-_No_of_non_correction_trials_(5)',
 '15_min_bin_-_No_of_non_correction_trials_(6)',
 '15_min_bin_-_Left_ITI_Touches_(1)',
 '15_min_bin_-_Left_ITI_Touches_(2)'

In [67]:
#generate new column for hr, fr, d, si, c, ri, rp
from string import Template

df_list=[df_clean]

for df in df_list:
#loop 3 different time bin

    bin_number=[1, 2, 3] #, 4, 5, 6
    for i in range(len(bin_number)):
        c=bin_number[i]
    
        #local 3 different time bin column
        hit='15_min_bin_-_Hits_(%s)'%(c)
        hit_c=df.loc[:, hit]
    
        miss = '15_min_bin_-_Misses_(%s)'%(c)
        miss_c=df.loc[:,miss]
    
        mistake = '15_min_bin_-_Mistakes_(%s)'%(c)
        mistake_c=df.loc[:,mistake]
    
        correct_rejection='15_min_bin_-_Correct_Rejections_(%s)'%(c)
        correct_rejection_c=df.loc[:,correct_rejection]
    
    
        center_iti_touches = '15_min_bin_-_Centre_ITI_Touches_(%s)'%(c)
        center_iti_touches_c=df.loc[:,center_iti_touches]
        
        Correction_Trial_Correct_Rejection = '15_min_bin_-_Correction_Trial_Correct_Rejections_(%s)'%(c)
        Correction_Trial_Correct_Rejection_c=df.loc[:,Correction_Trial_Correct_Rejection]
        
        Correction_Trial_Mistakes = '15_min_bin_-_Correction_Trial_Mistakes_(%s)'%(c)
        Correction_Trial_Mistakes_c = df.loc[:,Correction_Trial_Mistakes]
        
    
    #generate 3 time bin of hr, fr, d, si, c, ri, rp
        df.loc[:,('hr_15min_bin_'+str(c))]=hr_calcu(hit_c,miss_c)
        df.loc[:,('fr_15min_bin_'+str(c))]=fr_calcu(mistake_c, correct_rejection_c)
        df.loc[:,('d_15min_bin_'+str(c))]=d_calcu(hit_c, miss_c, mistake_c, correct_rejection_c)
        df.loc[:,('si_15min_bin_'+str(c))]=si_calcu(hit_c, miss_c, mistake_c, correct_rejection_c)
        df.loc[:,('c_15min_bin_'+str(c))]=c_calcu(hit_c, miss_c, mistake_c, correct_rejection_c)
        df.loc[:,('ri_15min_bin_'+str(c))]=ri_calcu(hit_c, miss_c, mistake_c, correct_rejection_c)
        df.loc[:,('rp_15min_bin_'+str(c))]=rp_calcu(center_iti_touches_c, hit_c, miss_c, mistake_c, correct_rejection_c, 
                                                   Correction_Trial_Correct_Rejection_c, Correction_Trial_Mistakes_c)
 
    df.loc[:, 'Hit_Rate'] =hr_calcu(df.loc[:, 'End_Summary_-_Hits_(1)'],df.loc[:, 'End_Summary_-_Misses_(1)'])
    df.loc[:, 'False_Alarm_Rate'] =fr_calcu(df.loc[:, 'End_Summary_-_Mistakes_(1)'], df.loc[:, 'End_Summary_-_Correct_Rejections_(1)'])
    df.loc[:, 'c'] =c_calcu(df.loc[:, 'End_Summary_-_Hits_(1)'], df.loc[:, 'End_Summary_-_Misses_(1)'], df.loc[:, 'End_Summary_-_Mistakes_(1)'], df.loc[:, 'End_Summary_-_Correct_Rejections_(1)'])
    df.loc[:, 'd'] =d_calcu(df.loc[:, 'End_Summary_-_Hits_(1)'], df.loc[:, 'End_Summary_-_Misses_(1)'], df.loc[:, 'End_Summary_-_Mistakes_(1)'], df.loc[:, 'End_Summary_-_Correct_Rejections_(1)'])
    df.loc[:, 'si'] =si_calcu(df.loc[:, 'End_Summary_-_Hits_(1)'], df.loc[:, 'End_Summary_-_Misses_(1)'], df.loc[:, 'End_Summary_-_Mistakes_(1)'], df.loc[:, 'End_Summary_-_Correct_Rejections_(1)'])
    df.loc[:, 'ri'] =ri_calcu(df.loc[:, 'End_Summary_-_Hits_(1)'], df.loc[:, 'End_Summary_-_Misses_(1)'], df.loc[:, 'End_Summary_-_Mistakes_(1)'], df.loc[:, 'End_Summary_-_Correct_Rejections_(1)'])
    df.loc[:, 'impuls'] =100*df.loc[:, 'End_Summary_-_Centre_ITI_Touches_(1)']/ df.loc[:, 'trial_by_trial_anal_-_ITI']
    


  # Remove the CWD from sys.path while we load stuff.


## plot s3 baseline

In [68]:
df['trial_by_trial_anal_-_ITI']

0      455.0
1      440.0
2      445.0
3      449.0
4      440.0
       ...  
622    412.0
623    417.0
624    410.0
625    426.0
626    426.0
Name: trial_by_trial_anal_-_ITI, Length: 627, dtype: float64

In [69]:
feature_list=['End_Summary_-_Hits_(1)','End_Summary_-_Misses_(1)', 'End_Summary_-_Mistakes_(1)','End_Summary_-_Correct_Rejections_(1)','Hit_Rate','False_Alarm_Rate', 'c', 'd', 'si', 'ri', 'impuls',
                'Correct_Choice_Latency','Incorrect_Choice_Latency', 'Reward_Retrieval_Latency','Correct_Choice_Latency_sd', 'Incorrect_Choice_Latency_sd','Reward_Retrieval_Latency_sd']

In [70]:
# transfer data
df_clean_melt=df_clean.melt(id_vars=['Schedule_run_date','Schedule_name','Environment_name','Group_ID','Animal_ID',
                                     'Schedule_run_date_short','cohort','key','id','session_order','ses_order', 'gander'], 
                            var_name="param", value_name="value").copy()

In [71]:
df_clean_melt['id'].unique()

array(['A-RL-3-coh_1', 'A-R-1-coh_1', 'A-N-4-coh_1', 'A-L-2-coh_1',
       'W-R-1-coh_1', 'W-L-2-coh_1', 'W-RL-3-coh_1', 'W-N-4-coh_1',
       'X-N-5-coh_1', 'X-RL-6-coh_1', 'X-L-7-coh_1', 'X-R-8-coh_1',
       'A-L-2-coh_2', 'A-N-4-coh_2', 'B-L-7-coh_2', 'B-N-5-coh_2',
       'W-L-2-coh_2', 'W-N-4-coh_2', 'X-L-7-coh_2', 'X-N-5-coh_2',
       'Y-R-9-coh_2', 'Y-RL-11-coh_2', 'Z-R-16-coh_2', 'Z-RL-14-coh_2',
       'A-R-1-coh_2', 'A-RL-3-coh_2', 'B-R-8-coh_2', 'B-RL-6-coh_2',
       'W-R-1-coh_2', 'W-RL-3-coh_2', 'X-R-8-coh_2', 'X-RL-6-coh_2',
       'Y-L-10-coh_2', 'Y-N-12-coh_2', 'Z-L-15-coh_2', 'Z-N-13-coh_2'],
      dtype=object)

In [72]:
print(len(df_clean_melt["id"].unique()))

36


#### filt out X-L-7-coh_1, W-N-4-coh_1, W-N-4-coh_2 in m
#### filt out A-RL-3-coh_2

###### 1st cohort mice (male n=6, excluded mice X-L-7 cannot pass stage 3 and mice W-N-4 finished stage 3 when the other mice finished the task, since they had difficulty to learn the CPT task ; female n=4) 
###### 2nd cohort mice (male n=15, female n=7 excluded W-N-4, A-RL-3 jumper, excluded in this cohort)

In [73]:
df_clean_melt=df_clean_melt[~ df_clean_melt["id"].isin(["X-L-7-coh_1", "W-N-4-coh_1", "W-N-4-coh_2", "A-RL-3-coh_2"])]

In [74]:
df_clean_melt['id'].unique()

array(['A-RL-3-coh_1', 'A-R-1-coh_1', 'A-N-4-coh_1', 'A-L-2-coh_1',
       'W-R-1-coh_1', 'W-L-2-coh_1', 'W-RL-3-coh_1', 'X-N-5-coh_1',
       'X-RL-6-coh_1', 'X-R-8-coh_1', 'A-L-2-coh_2', 'A-N-4-coh_2',
       'B-L-7-coh_2', 'B-N-5-coh_2', 'W-L-2-coh_2', 'X-L-7-coh_2',
       'X-N-5-coh_2', 'Y-R-9-coh_2', 'Y-RL-11-coh_2', 'Z-R-16-coh_2',
       'Z-RL-14-coh_2', 'A-R-1-coh_2', 'B-R-8-coh_2', 'B-RL-6-coh_2',
       'W-R-1-coh_2', 'W-RL-3-coh_2', 'X-R-8-coh_2', 'X-RL-6-coh_2',
       'Y-L-10-coh_2', 'Y-N-12-coh_2', 'Z-L-15-coh_2', 'Z-N-13-coh_2'],
      dtype=object)

In [75]:
print(len(df_clean_melt["id"].unique()))

32


In [76]:
def normal(mean, std, histmax=False, color="black"):
    x = np.linspace(mean-4*std, mean+4*std, 200)
    p = stats.norm.pdf(x, mean, std)
    if histmax:
        p = p*histmax/max(p)
    z = plt.plot(x, p, color, linewidth=2)

In [1]:
df_list=[df_clean_melt]
folder_list=["stage 3 training"]

for a, b  in zip(df_list, folder_list):
    for i in feature_list:  
            
        df=a[(a["param"].isin([i])) & (a["session_order"].isin(["session_1","session_2","session_3",
                                                                "session_4","session_5","session_6","session_7"]))]
        plot=sns.pointplot(x='session_order', y='value', data=df, errorbar=('ci', 68), 
                           hue='gander', hue_order=['male', 'female'], palette=['royalblue', 'coral']) 
        print(title)
        plt.title(title)
        plt.legend(bbox_to_anchor=(1.4, 1),borderaxespad=0)
        plot.set(xlabel="")
        plot.set(ylabel="")
        plt.xticks(rotation = 45, ha='right')
            
        if i in ['Hit_Rate','False_Alarm_Rate', 'c', 'impuls']:            
            plt.ylim(0)
            
        if i in ['d']:            
            plt.ylim(-0.2)
                
        save_path = "......\\fig\\%s"%(b) #replace the "......" to a full directory of folder path where you want to save it
        if os.path.isdir(save_path) == False:
            os.mkdir(save_path)
        
        
        plt.savefig(save_path+'\%s.png'%(title), dpi=800, bbox_inches="tight")
        plt.close()
        df.to_csv(save_path+'\%s.csv'%title, index=False)   

In [78]:
df_clean.to_csv("......\\s3_baseline.csv", index=None) #replace the "......" to a full directory of folder path where you want to save it