# Organize_Decision_Making_Data

### This script takes raw JSON data from the decision-making phase of the paradigm and organizes it for all participants and all trials.

#### Created by: Nachshon Korem & Ziv Ben-Zion 
#### Last Updated:May 2023


In [1]:
# Importing Relevant Packages

# Pandas is a package for data manipulation, analysis, and visualization (pd is a short name)
import pandas as pd 

# NumPy (Numerical Python) is a package for scientific computing, data analysis, and numerical computations (np is a short name)
import numpy as np

# JSON (JavaScript Object Notation) is a package for encoding and decoding data in JSON format
import json

# Glob is a package which takes a string pattern and returns a list of filenames that match that pattern
from glob import glob

# OS (Operating System) is a package providing a way to interact with the operating system that Python is running on
import os


In [2]:
#finding the data all of the subjects
sublist = glob('C:\\Users\\zhb4\\Box Sync\\Neural Computations\\Online Pilot\\Raw Data\\study_result_*')

In [3]:
#This is a function used to extract a single subject data based on its JSON file and subject number
 
def extract_sub_data(res, sub): 
    '''
    Input: 
    res- data extracted from JSON for a single subject
    sub- string representing the subject number
    Outupt
    df- pandas DataFrame containing the extracted data.
    '''   
    # creating an empty dataframe
    df = pd.DataFrame()
    
# The function loops through each trial in the res data and extracts relevant pieces of information, including the mean reward
#  for each of the two stimuli (stim1 and stim2), the participant's decision (decision) and its reaction time (RT_decision). 
    for i in range(len(res)):
        time = res[i]['time'] #time in Unix timestamp
        stim1 = res[i]['cues'][0]['reward_mean'] #learned mean value of stimulus #1
        stim2 = res[i]['cues'][1]['reward_mean'] #learned mean value of stimulus #2
        decision = res[i]['cues'][2]['chosen_i']+1 # choosing stimulus #1 is "1" and choosing stimulus #2 is "2"
        RT_decision =res[i]['cues'][2]['rt'] #time it took to make a decision (sec)
    
#  In trials without input we define the rating and their reaction times as NaN (Not a Number)        
        if res[i]['cues'][0]['takingInput']==False:
            val1, conf1, val2, conf2, RT_val1, RT_conf1, RT_val2, RT_conf2 = np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN
            conf_decision, RT_conf_decision = np.NaN, np.NaN
            
#  In trials with input, we also extract participants' value & confidence ratings for each stimulus (val1, val2, conf1, conf2) 
#  and their reaction times (RT_val1, RT_val2, RT_conf1, RT_conf2), and the confidence in the choice plus the reaction time
# (conf_decision), RT_onf_decision)
        else:
            val1 = res[i]['cues'][0]['input']['rewardLikely_UI']
            conf1 = res[i]['cues'][0]['input']['confidence_UI']
            RT_val1 = res[i]['cues'][0]['input']['rewardLikely_UI_rt']
            RT_conf1 = res[i]['cues'][0]['input']['confidence_UI_rt']

            val2 = res[i]['cues'][1]['input']['rewardLikely_UI']
            conf2 = res[i]['cues'][1]['input']['confidence_UI']
            RT_val2 = res[i]['cues'][1]['input']['rewardLikely_UI_rt']
            RT_conf2 = res[i]['cues'][1]['input']['confidence_UI_rt']

            conf_decision = res[i]['cues'][2]['input']['confidence_UI']
            RT_conf_decision = res[i]['cues'][2]['input']['confidence_UI_rt']

# For each trial, the function creates a dictionary with the extracted information and creates a temporary DataFrame. 
# These temporary DataFrames are concatenated together to create the final DataFrame with all the trials of a single subject.
        trial = {'sub': sub, 'time':time, 'trial': i+1, 'stim1': stim1, 'stim2':stim2, 'decision': decision, 'RT_decision': RT_decision,
             'val1': val1, 'RT_val1': RT_val1, 'conf1': conf1, 'RT_conf1': RT_conf1, 
             'val2': val2, 'RT_val2': RT_val2, 'conf2': conf2, 'RT_conf2': RT_conf2,
             'conf_decision': conf_decision, 'RT_conf_decision': RT_conf_decision}
        temp = pd.DataFrame(trial, index=[0])
        df = pd.concat([df, temp])
        df = df.reset_index(drop=True)
        
    df.head()
    return df


In [4]:
df = pd.DataFrame()

#This is used to extract a single subject dataframe based on its JSON file and subject number

for folder_path in sublist: 
    # Specify the subject number
    sub = folder_path.split("_")[-1]
    # Get the list of items in the subject folder
    items = os.listdir(folder_path)
    # Filter folders from the list
    folders = [item for item in items if os.path.isdir(os.path.join(folder_path, item))]
    # Check if there are exactly 13 folders
    if len(folders) == 13:
        # If so, sort folders alphabetically
        sorted_folders = sorted(folders, key=lambda x: int(x.split('_')[1]))
        # Retrieve the name of the 8th folder - always the decision-making phase
        decision_making_folder = sorted_folders[7]
        # Update the folder_path with the name of the 8th folder
        folder_path = os.path.join(folder_path, decision_making_folder)
        # Open the data.txt file inside the 8th folder and read its contents
        data_file_path = os.path.join(folder_path, "data.txt")
        with open(data_file_path, "r") as file:
            res = file.read()
            #open the JSON file
            res = json.loads(res)
            #Ignore the beginning and end to take only the data
            res = res[1]['data']
            #Use the function above "extract_sub_data"
            temp = extract_sub_data(res, sub)
        
        #Save the data for one subject and continue to the next 
        df = pd.concat([df,temp])
df = df.reset_index(drop=True)

In [5]:
#Obtaining Depression and Anxiety Scores 

df_PHQ = pd.DataFrame()
df_GAD = pd.DataFrame()

#This is used to extract a single subject dataframe based on its JSON file and subject number

for folder_path in sublist: 
    # Specify the subject number
    sub = folder_path.split("_")[-1]
    # Get the list of items in the subject folder
    items = os.listdir(folder_path)
    # Filter folders from the list
    folders = [item for item in items if os.path.isdir(os.path.join(folder_path, item))]
    # Check if there are exactly 13 folders
    if len(folders) == 13:
        # If so, sort folders alphabetically
        sorted_folders = sorted(folders, key=lambda x: int(x.split('_')[1]))
        # Retrieve the name of the 11th folder - always the PHQ-9 (depression) questionnaire 
        depression_folder = sorted_folders[10]
        # Update the folder_path with the name of the 11th folder
        depression_folder_path = os.path.join(folder_path, depression_folder)
        # Open the data.txt file inside the 11th folder and read its contents
        depression_data_file_path = os.path.join(depression_folder_path, "data.txt")
        
        with open(depression_data_file_path, "r") as file:
            res = file.read()
            #open the JSON file
            res = json.loads(res)
            #Ignore the beginning and end to take only the data
            res = res[0]['response']['P0_Q0']
            PHQ9_total = sum(res.values())
            #Use the function above "extract_sub_data"
            temp = {'sub':sub,'PHQ9_total': PHQ9_total}
            temp = pd.DataFrame(temp, index=[0])
            df_PHQ = pd.concat([df_PHQ, temp])
        
        # Retrieve the name of the 10th folder - always the GAD-7 (anxiety) questionnaire 
        anxiety_folder = sorted_folders[9]   
        # Update the folder_path with the name of the 10th folder
        anxiety_folder_path = os.path.join(folder_path, anxiety_folder)
        # Open the data.txt file inside the 10th folder and read its contents
        anxiety_data_file_path = os.path.join(anxiety_folder_path, "data.txt")
        
        with open(anxiety_data_file_path, "r") as file:
            res = file.read()
            #open the JSON file
            res = json.loads(res)
            #Ignore the beginning and end to take only the data
            res = res[0]['response']['P0_Q0']
            GAD7_total = sum(res.values())
            #Use the function above "extract_sub_data"
            temp = {'sub':sub, 'GAD7_total': GAD7_total}      
            temp = pd.DataFrame(temp, index=[0])
            df_GAD = pd.concat([df_GAD, temp])
#Save the data for one subject and continue to the next 
df_PHQ = df_PHQ.reset_index(drop=True)
df_GAD = df_GAD.reset_index(drop=True)

In [6]:
#merge all three dataframes
# Merge df and df_PHQ based on a common column
clinical_df = pd.merge(df_GAD, df_PHQ, left_on='sub', right_on='sub')

In [7]:
#what it the size of the task df? 
num_trials=df.shape[0]
num_variables=df.shape[1]
num_subjects=int(df.shape[0]/30)

print(f"The total number of trials is {num_trials}")
print(f"The total number of variables per trial is {num_variables}")
print(f"The total number of subjects is {num_subjects}")

The total number of trials is 1200
The total number of variables per trial is 17
The total number of subjects is 40


In [8]:
#what it the size of the clinical df? 
num_subjects=clinical_df.shape[0]
variables=clinical_df.shape[1]

print(f"The total number of subjects is {num_subjects}")
print(f"The total number of clinical variables per subject is {num_variables}")

The total number of subjects is 40
The total number of clinical variables per subject is 17


In [9]:
df.to_csv('Decision_Making_Data_40.csv', index=False)

In [67]:
clinical_df.to_csv('Clinical_Data.csv', index=False)