<a href="https://colab.research.google.com/github/vekteo/ASRT_ultra_fast_consolidation/blob/main/ASRT_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **ASRT analysis script for the rapid consolidation online project**


---
1. Upload the merged output files (*ASRT.csv* on OSF)
2. Hit *Run all*


Code by Teodóra Vékony https://github.com/vekteo

Lyon Neuroscience Research Center (CRNL), Université Claude Bernard Lyon 1


# Import Python packages

In [None]:
import pandas as pd
import numpy as np
from google.colab import files
import glob
import seaborn as sns
import matplotlib as plt

# Read datafiles

In [None]:
# get data file names
filenames = glob.glob("*.csv")
dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename, encoding="utf8", decimal='.'))

# Concatenate all data into one DataFrame
df = pd.concat(dfs, ignore_index=True)


# Drop participants

***Write the public ID of the participants to drop into the array***

In [None]:
participants_to_drop = ['wxxo75wg','9f29r2qv','7dh53ycb','86pcb2mb','esmmznl1','npsaibu7','qrzq7ts2','sbkvhjhy','trkopxt7','ql49dn7x','c6yyouxw','297gibh5','wunf7i3o','7m76soye','cayu2l31','uqgl5lcz','bxb8djmc','w6vcdqme','p3fyit39','30mhiihl','01hhoswd','j0g4kqyn','bg1yka4d','2j62a51o','9hy16b49','o9aanl9z','t1t2dyid','3pw99p9u','4oltkref','p4f8l137','q965bort','smvr23e8','95f4z9l1','cv841tyx','qmcdul7z','79byw8v4','4xnyxjl0','139lnsjl','zxyx8jio','dvs1rx99','j1mttxb7','ye3yxiiu','02lil6a8','clkmmxkg','rra05osw','32zod29m','l452387h','ne1zmdp0','olyffqcf','bj8l5rz6','ic7gxv2p','k4vkynk3','qc8kj5y4','w5xvw32d','2fjf1gdf','993mmbxg','da8q2m1i','fehyc211','m5zau3bf','54pj2wer','fdvkiue5','fv4eqjr5','3wif8txo','ymmro5ep','sn22rw6s','ia5zwn5m','4yiwcr2r','858nish3','ofenlmw3','ovmb6sgq','wcsfo7p6','dor4944c','hk8jwhaz','bsh7t661','536ektl5','gr7qbqqj','my49x5bc','z63ep3ee','2uaif6hy','ran6tboj']
df = df[~df['Participant Public ID'].isin(participants_to_drop)]
df.head()

# Preprocessing

**1. Make string values numeric**

In [None]:
#to numeric
df['cumulative_RT'] = pd.to_numeric(df['cumulative_RT'], errors='coerce')
df['block'] = pd.to_numeric(df['block'], errors='coerce')
df['trial_number'] = pd.to_numeric(df['trial_number'], errors='coerce')
df['is_practice'] = pd.to_numeric(df['is_practice'], errors='coerce')
df['first_response'] = pd.to_numeric(df['first_response'], errors='coerce')

**2. Drop the unnecessary columns from the dataframe**

In [None]:
#drop unnecessary columns
df = df.loc[:, ['Participant Public ID','time_elapsed','group','rt','correct','triplet_type','p_or_r','block','sequence','is_practice','first_response','trial_number','correct_pos', 'correct_resp_button', 'resp_button','cumulative_RT','actual_triplet']]
df.head()

**3. Drop unnecassary rows from the dataframe**

In [None]:
#drop unnecessary rows (i.e., rows the do not contain ASRT trials)
df = df[df['trial_number'].notna()]
df.head()

**4. Define epochs in a new column "epoch"**

In [None]:
#define epochs
df.loc[df['block'] <= 5, 'epoch'] = 1
df.loc[(df['block'] >= 6)  & (df['block'] <= 10), 'epoch'] = 2
df.loc[(df['block'] >= 11) & (df['block'] <= 15), 'epoch'] = 3 
df.loc[(df['block'] >= 16) & (df['block'] <= 20), 'epoch'] = 4 
df.loc[(df['block'] >= 21) & (df['block'] <= 25), 'epoch'] = 5
df.head()

**5. Define accuracy in binary format in a new column "ACC"**

In [None]:
#define ACC
df.loc[df['correct'] == "1", 'ACC'] = 1
df.loc[df['correct']  != df['correct'], 'ACC'] = 0
df.head(200)

**6. Drop trials that were not first responses to a given stimulus**

In [None]:
#drop not first responses
all_trials = df.copy()
indexNames = df[ df['first_response'] == 0 ].index
df.drop(indexNames , inplace=True)
all_trials_last_response = df.copy()
df.head(5000)

**7. Drop trials with more than 1000 ms response time**

(this requirement is specific to **ONLINE** data acquisitions)

In [None]:
#drop trials with +1000ms
indexNames = df[ df['cumulative_RT'] > 1000 ].index
df.drop(indexNames , inplace=True)
all_trials_last_response_without_1000ms = df.copy()
df.head()

# Create and save ASRT dataframes

**1. Save ASRT data into a new dataframe**

In [None]:
#save ASRT dataset into a separate dataframe
ASRT_df = df.copy()

In [None]:
ASRT_sequences = ASRT_df.pivot_table(index=['Participant Public ID','group'], values=['sequence'], aggfunc=np.median)
ASRT_sequences.to_csv("ASRT_sequences.csv", index=False)
files.download("ASRT_sequences.csv")

**2. Drop trials for first two trials (X), for trills (T), and for repetitions ("T")**



In [None]:
#drop X, T, R trials
indexNames = ASRT_df[ ASRT_df["triplet_type"] == "X" ].index
ASRT_df.drop(indexNames, inplace=True)
indexNames2 = ASRT_df[ ASRT_df["triplet_type"] == "T" ].index
ASRT_df.drop(indexNames2, inplace=True)
indexNames3 = ASRT_df[ ASRT_df["triplet_type"] == "R" ].index
ASRT_df.drop(indexNames3, inplace=True)
ASRT_df.head()

**2. Drop practice blocks from ASRT dataframes**

In [None]:
#drop practice blocks
indexNames = ASRT_df[ ASRT_df['is_practice'] == 1 ].index
ASRT_df.drop(indexNames , inplace=True)

**4. Drop incorrect trials**

In [None]:
#drop incorrect trials
indexNames = ASRT_df[ ASRT_df['ACC'] == 0 ].index
ASRT_df.drop(indexNames, inplace=True)

**7. Download full dataframes**

In [None]:
#all trials
all_trials.to_csv("all_trials.csv",index=False)
files.download("all_trials.csv")

all_trials_last_response.to_csv("all_trials_last_response.csv",index=False)
files.download("all_trials_last_response.csv")

#all trials without 1000 ms
df.to_csv("all_trials_last_response_without_1000ms.csv",index=False)
files.download("all_trials_last_response_without_1000ms.csv")

#ASRT dataset with only correct trials
ASRT_df.to_csv("ASRT_only_correct_trials.csv", index=False)
files.download("ASRT_only_correct_trials.csv")

# Statistical learning scores

**1. Calculate median RT and save it to dataframes**

In [None]:
#Triplet learning by epoch: calculate median RT - wide format
TL_RT_wide = ASRT_df.pivot_table(index=['Participant Public ID','group'], columns=['epoch','triplet_type'], values='cumulative_RT', aggfunc=np.median)
TL_RT_wide = TL_RT_wide.rename(columns={1.0: 'e1', 2.0: 'e2', 3.0:'e3', 4.0:'e4', 5.0: 'e5'}, level=0)
TL_RT_wide.columns = TL_RT_wide.columns.map('_'.join).str.strip('_')

for x in [1,2,3,4,5]:
  TL_RT_wide["e"+str(x)+"_TL"] = TL_RT_wide["e"+str(x)+"_L"] - TL_RT_wide["e"+str(x)+"_H"]

**2. Calculate mean RTs and divide the statistical learning scores by them to get the standardized scores**

In [None]:
#Calculate mean RT - wide format
mean_RT_wide = ASRT_df.pivot_table(index=['Participant Public ID','group'], columns=['epoch'], values='cumulative_RT', aggfunc=np.mean)
mean_RT_wide = mean_RT_wide.rename(columns={1.0: 'e1_mean', 2.0: 'e2_mean', 3.0:'e3_mean', 4.0:'e4_mean', 5.0: 'e5_mean'}, level=0)

TL_RT_wide = TL_RT_wide.merge(mean_RT_wide, how='outer', on='Participant Public ID')
TL_RT_wide.head(5)

TL_RT_wide['e1_TL_stand'] = TL_RT_wide['e1_TL']/TL_RT_wide['e1_mean']
TL_RT_wide['e2_TL_stand'] = TL_RT_wide['e2_TL']/TL_RT_wide['e2_mean']
TL_RT_wide['e3_TL_stand'] = TL_RT_wide['e3_TL']/TL_RT_wide['e3_mean']
TL_RT_wide['e4_TL_stand'] = TL_RT_wide['e4_TL']/TL_RT_wide['e4_mean']
TL_RT_wide['e5_TL_stand'] = TL_RT_wide['e5_TL']/TL_RT_wide['e5_mean']

TL_RT_wide
TL_RT_wide.to_csv("TL_RT_wide.csv",index=True)
files.download("TL_RT_wide.csv")

# General skill learning scores

**1. Calculate median RT and save it to dataframes**

In [None]:
#General skill learning by epoch: calculate median RT - wide format
GS_RT_wide = ASRT_df.pivot_table(index=['Participant Public ID','group'], columns=['epoch'], values='cumulative_RT', aggfunc=np.median)
GS_RT_wide = GS_RT_wide.rename(columns={1.0: 'e1', 2.0: 'e2', 3.0:'e3', 4.0:'e4', 5.0: 'e5'}, level=0)

GS_RT_wide.to_csv("GS_RT_wide.csv",index=True)
files.download("GS_RT_wide.csv")