In [3]:
import pandas as pd
import numpy as np
import os, glob
import os.path, time
import zipfile as zp
import re
import random
from datetime import datetime, date
from datetime import datetime, timedelta
from scipy import optimize
import matplotlib.pyplot as plt
import matplotlib.axes as ax
from scipy import stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
from statsmodels.base.model import LikelihoodModel

from numpy.random import randn
from numpy.random import seed
from scipy.stats import pearsonr
import unicodedata
#import seaborn as sns; sns.set()

In [15]:
def subfile_df():
    path = "/Users/zyy219/Documents/Risk_project/Data/Survey/"
    all_files = glob.glob(os.path.join(path, "study_*_participant_responses.csv"))
    df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files)
    df_merged   = pd.concat(df_from_each_file, ignore_index=True)
    return df_merged

def subID_list():
    subID = ['797','806','809','826','843','845','855','856','857','1031','1153','1154','1157','1174','1262','1264','1266',
        '1364','1367','1379','1380','1381','1384','1435','1465','1467','1468','1489','1492','1493','1494','1495','1498','1519',
        '1520','1527','1532','1536','1541','1721','1722','1737','1741','1788','1791','1810','1812','1833','1934']
    return subID

### Uncertainty, Risk (DOSPERT, IUS, LOT-R)

In [16]:
def dospert_domain_label(subID,Question):    
    df = subfile_df()
    S = [[10,'01'],[10,'08'],[20,'03'],[20,'06'],[30,'07'],[30,'10'],[40,'07'],[40,'08']]
    R = [[10,'02'],[10,'04'],[20,'02'],[20,'04'],[20,'07'],[30,'04'],[40,'02'],[40,'03']]
    F = [[10,'03'],[10,'05'],[10,'09'],[20,'05'],[20,'08'],[20,'09'],[30,'03'],[30,'06']]
    H = [[10,'06'],[20,'10'],[30,'02'],[30,'05'],[40,'01'],[40,'04'],[40,'05'],[40,'06']]
    E = [[10,'07'],[10,'10'],[20,'01'],[30,'01'],[30,'08'],[30,'09'],[40,'09'],[40,'10']]
    if Question == 'Likelihood':
        survey = ['DOSPERT-10', 'DOSPERT-20','DOSPERT-30','DOSPERT-40']
    if Question == 'RP':
        survey = ['DOSPERT-10-RP', 'DOSPERT-20-RP','DOSPERT-30-RP','DOSPERT-40-PP']
    if Question == 'EB':
        survey = ['DOSPERT-10-EB', 'DOSPERT-20-EB','DOSPERT-30-EB','DOSPERT-40-EB']
    
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]
    ser_title = subsetDataFrame['survey_title'].to_list()
    q_title =  subsetDataFrame['question_title'].to_list()
    n = [] #question_title
    for s in range(len(q_title)):
        if q_title[s][1] == '.':
            n.append('0'+q_title[s][0])
        else:
            n.append(q_title[s][0:2])
    answer_title = subsetDataFrame['answer_value'].to_list()
    zip_title = []
    domain_label = []
    answer =[]
    for i in range(len(ser_title)):
        if Question == 'Likelihood':
            zip_title.append([int(ser_title[i][-2:]),n[i]])
        else:
            zip_title.append([int(ser_title[i][-5:-3]),n[i]])
        answer.append(int(answer_title[i][0]))
        if zip_title[i] in S:
            domain_label.append('S')
        if zip_title[i] in R:
            domain_label.append('R')
        if zip_title[i] in F:
            domain_label.append('F')
        if zip_title[i] in H:
            domain_label.append('H')
        if zip_title[i] in E:
            domain_label.append('E')
    return domain_label, answer

def dosert(subID,Question):

    domain_label, answer = dospert_domain_label(subID,Question)

    dospert = pd.DataFrame([domain_label,answer ]).T 
    dospert.columns = ["domain_label", "answer"]

    dospert_domain = dospert.groupby(dospert.domain_label).sum()
    dosert_score = dospert_domain.answer.to_list()
    domain = dospert_domain.index.to_list()
    return domain, dosert_score
#40 is the highest likelihood of each domain


def IUS_factor_label(subID):
    df = subfile_df()
    f1 =[[9,1], [9,2], [9,3], [9,9], [8,3], [8,4], [8,5], [8,6], [8,7], [8,8], [7,2], [7,4], [7,5], [7,6],[7,7]]
    f2 = [[9,4], [9,5], [9,6], [9,7], [9,8], [8,1], [8,2], [8,9], [7,1], [7,3], [7,8], [7,9]]
    survey = ['IUS-9', 'IUS-18','IUS-27']
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]
    ser_title = subsetDataFrame['survey_title'].to_list()
    q_title =  subsetDataFrame['question_title'].to_list()
    answer_title = subsetDataFrame['answer_value'].to_list()
    zip_title = []
    domain_label = []
    answer =[]
    for i in range(len(ser_title)):
        zip_title.append([int(ser_title[i][-1]),int(q_title[i][0])])
        answer.append(int(answer_title[i][0]))
        if zip_title[i] in f1:
            domain_label.append('f1')
        if zip_title[i] in f2:
            domain_label.append('f2')
       
    return domain_label, answer

def IUS_score(subID):
    
    domain_label, answer = IUS_factor_label(subID)
    ius = pd.DataFrame([domain_label,answer ]).T 
    ius.columns = ["domain_label", "answer"]

    ius_domain = ius.groupby(ius.domain_label).sum()
    ius_score = ius_domain.answer.to_list()
    domain = ius_domain.index.to_list()
    return domain, ius_score

def lot(subID):
    df = subfile_df()
    opt_des = ["In uncertain times, I usually expect the best.","I'm always optimistic about my future.",\
           "Overall, I expect more good things to happen to me than bad."]
    pes_des = ["If something can go wrong for me, it will.","I hardly ever expect things to go my way.",\
          "I rarely count on good things happening to me."]
    df_temp = df[df['survey_title'].isin(['LOT-R']) & df['participant_id'].isin([subID])]
    question = df_temp['question_title'].to_list()
    answer = df_temp['answer_value'].to_list()
    opt_score = 0
    pes_score = 0
    for i in range(len(question)):
        if question[i] in opt_des:
        #print(answer[i])
            if answer[i][0] == 'A':
                opt_score+= 5
            if answer[i][0] == 'B':
                opt_score+= 4
            if answer[i][0] == 'C':
                opt_score+= 3
            if answer[i][0] == 'D':
                opt_score+= 2
            if answer[i][0] == 'E':
                opt_score+= 1
        if question[i] in pes_des:
        #print(answer[i])
            if answer[i][0] == 'A':
                pes_score+= 5
            if answer[i][0] == 'B':
                pes_score+= 4
            if answer[i][0] == 'C':
                pes_score+= 3
            if answer[i][0] == 'D':
                pes_score+= 2
            if answer[i][0] == 'E':
                pes_score+= 1
    
    return ['opt','pes'], [opt_score,pes_score]

### Impulsivity

In [17]:
def bisbas_label(subID):
    BAS =[[12,'03'], [12,'09'], [12,'12'], [24,'09'], [12,'05'], [12,'10'], [24,'03'], [24,'08'], \
      [12,'04'], [12,'07'], [24,'02'], [24,'06'], [12,'11']]
    BIS = [[12,'02'], [12,'08'], [24,'01'], [24,'04'], [24,'07'], [24,'10'], [24,'12']]
    
    df = subfile_df()    
    survey = ['BIS/BAS-12', 'BIS/BAS-24']
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]
    ser_title = subsetDataFrame['survey_title'].to_list()
    q_title =  subsetDataFrame['question_title'].to_list()
    n = [] #question_title
    for s in range(len(q_title)):
        if q_title[s][1] == '.':
            n.append('0'+q_title[s][0])
        else:
            n.append(q_title[s][0:2])
            
    answer_value = subsetDataFrame['answer_value'].to_list()
    #print(answer_value)
    zip_title = []
    domain_label = []
    answer =[]
    for i in range(len(ser_title)):
        zip_title.append([int(ser_title[i][-2:]),n[i]])
        #print(zip_title[i])
        if zip_title[i] == [12,'02']:
            #print(i)
            #print(answer_value[i][0])
            if int(answer_value[i][0]) == 1:
                answer.append(4)
            if int(answer_value[i][0]) == 2:
                answer.append(3)
            if int(answer_value[i][0]) == 3:
                answer.append(2)
            if int(answer_value[i][0]) == 4:
                answer.append(1)
        elif zip_title[i] == [12,'08']:
            #print(i)
            #print(answer_value[i][0])
            if int(answer_value[i][0]) == 1:
                answer.append(4)
            if int(answer_value[i][0]) == 2:
                answer.append(3)
            if int(answer_value[i][0]) == 3:
                answer.append(2)
            if int(answer_value[i][0]) == 4:
                answer.append(1)
        else:
            answer.append(int(answer_value[i][0]))
            #print(answer)
        if zip_title[i] in BAS:
            domain_label.append('bas')
        if zip_title[i] in BIS:
            domain_label.append('bis')
        
    return domain_label, answer

def bisbas(subID):

    domain_label, answer = bisbas_label(subID)

    bisbas = pd.DataFrame([domain_label,answer ]).T 
    bisbas.columns = ["domain_label", "answer"]

    bisbas_domain = bisbas.groupby(bisbas.domain_label).sum()
    bisbas_score = bisbas_domain.answer.to_list()
    domain = bisbas_domain.index.to_list()
    return domain, bisbas_score

def UPPS(subID):
    
    reverse = [[12,'02'],[12,'03'],[12,'05'],[12,'07'],[12,'08'],[12,'09'],[12,'10'],[12,'12'],
          [24,'01'],[24,'03'],[24,'05'],[24,'06'],[24,'08'],[24,'10'],[24,'11'],
          [36,'01'],[36,'02'],[36,'05'],[36,'06'],[36,'07'],[36,'10'],[36,'11'],[36,'12'],
          [48,'03'],[48,'04'],[48,'05'],[48,'08'],[48,'09'],[48,'10'],[48,'11'],
          [59,'01'],[59,'02'],[59,'03'],[59,'04'],[59,'06'],[59,'07'],[59,'08'],[59,'09'],[59,'10'],[59,'11']]
     
    df = subfile_df()
    survey = ['UPPS-36', 'UPPS-48', 'UPPS-59', 'UPPS-24', 'UPPS-12']
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]
    survey_title = subsetDataFrame['survey_title'].to_list()
    question_title =  subsetDataFrame['question_title'].to_list()
    n = [] #question_title
    for s in range(len(question_title)):
        if question_title[s][1] == '.':
            n.append('0'+question_title[s][0])
        else:
            n.append(question_title[s][0:2])
    answer_value = subsetDataFrame['answer_value'].to_list()
    answer =[]
    for i in range(len(survey_title)):
    
        zip_title = [int(survey_title[i][-2:]),n[i]]
        if zip_title in reverse:
        
            if int(answer_value[i][0]) == 1:
                answer.append(4)
            
            if int(answer_value[i][0]) == 2:
                answer.append(3)
            
            if int(answer_value[i][0]) == 3:
                answer.append(2)
            
            if int(answer_value[i][0]) == 4:
                answer.append(1)       
        else:
            answer.append(int(answer_value[i][0]))
    
    result = sum(answer)
    return result

def bis11(subID):
    df = sub_file()
    survey = ['BIS11-15', 'BIS11-30']
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]
    question_title = subsetDataFrame['question_title'].to_list()
    answer_value = subsetDataFrame['answer_value'].to_list()
    reverse = ['12. I am a careful thinker.','1. I plan tasks carefully.','7. I plan trips well ahead of time.',
              'I save regularly.','8.  I am self controlled.','15. I like to think about complex problems.',
               '13. I plan for job security.','I concentrate easily.','15.  I am future oriented.',
               '5. I am a steady thinker.','14. I like puzzles.','8. I can only think about one thing at a time.']
    answer = []
    for i in range(len(question_title)):
        if  question_title[i] in reverse:

            if int(answer_value[i][0]) == 1:
                answer.append(4)

            if int(answer_value[i][0]) == 2:
                answer.append(3)

            if int(answer_value[i][0]) == 3:
                answer.append(2)

            if int(answer_value[i][0]) == 4:
                answer.append(1)       
        else:
            answer.append(int(answer_value[i][0]))
    return sum(answer)

### Anxiety

In [18]:
def phq(subID):
    
    df = subfile_df()
    survey = ['PHQ8']
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]

    answer_value = subsetDataFrame['answer_value'].to_list()
    answer = []
    for i in range(len(answer_value)):
        answer.append(int(answer_value[i][0]))
    #print(answer)
    return sum(answer)

def stai_s(subID):
    df = subfile_df()
    reverse = ['I feel calm.', 'I feel secure.', 'I feel at ease.','I feel satisfied.','I feel self-confident.','I am relaxed.'
          'I feel content.','I feel steady','I feel pleasant.']
    survey = ['STAI-State']
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]

    question_title =  subsetDataFrame['question_title'].to_list()
    answer_value = subsetDataFrame['answer_value'].to_list()

    answer =[]
    for i in range(len(question_title)):

        if question_title[i] in reverse:
        
            if int(answer_value[i][0]) == 1:
                answer.append(4)
            
            if int(answer_value[i][0]) == 2:
                answer.append(3)

            if int(answer_value[i][0]) == 3:
                answer.append(2)

            if int(answer_value[i][0]) == 4:
                answer.append(1)       
        else:
            answer.append(int(answer_value[i][0]))

    result = sum(answer)
    return result

def stai_t(subID):
    df = subfile_df()
    survey = ['STAI-Trait']
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]
    reverse = ['I feel pleasant','I feel satisfied with myself','I feel rested','I am "calm, cool, and collected"',
               'I am happy','I feel secure','I make decisions easily','I am content','I am a steady person']
    question_title =  subsetDataFrame['question_title'].to_list()
    answer_value = subsetDataFrame['answer_value'].to_list()

    answer =[]
    for i in range(len(question_title)):

        if question_title[i] in reverse:

            if int(answer_value[i][0]) == 1:
                answer.append(4)

            if int(answer_value[i][0]) == 2:
                answer.append(3)

            if int(answer_value[i][0]) == 3:
                answer.append(2)

            if int(answer_value[i][0]) == 4:
                answer.append(1)       
        else:
            answer.append(int(answer_value[i][0]))

    result = sum(answer)
    return result

def BSI(subID):
    df = subfile_df()

    somatization = ['Faintness or dizziness','Pains in the heart or chest','Nausea or upset stomach',
            'Trouble getting your breath','Hot or cold spells.','Numbness or tingling in parts of your body',
           'Feeling weak in parts of your body']
    obssession = ['Trouble remembering things','Feeling blocked in getting things done',
               'Having to check and double check what you do','Difficulty making decisions','Your mind going blank',
              'Trouble concertrating']
    interpersonal = ['Your feelings being easily hurt','Feeling that people are unfriendly or dislike you',
                    'Feeling inferior to others']

    anxiety = ['Nervousness or shakiness inside','Suddenly scared for no reason','Feeling fearful',
               'Feeling tense or keyed up','Spells of terror or panic','Feeling so restless you couldn’t sit still']

    hosbility = ['Feeling easily annoyed or irritated','Temper outbursts that you could not control',
                'Having urges to beat, injure, or harm someone','Having urges to break or smash things',
                'Getting into frequent arguments']

    phobic = ['Feeling afraid in open spaces','Feeling afraid to travel on buses, subways, or trains',
             'Having to avoid certain things, places, or activities because they frighten you','Feeling uneasy in crowds',
             'Feeling nervous when you are left alone']

    paranoid = ['Feeling others are to blame for most of your troubles','Feeling that most people cannot be trusted',
               'Feeling that you are watched or talked about by others',
                'Others not giving you proper credit for your achievements','Feeling that people will take advantage of you if you let them']

    psychoticism = ['The idea that someone else can control your thoughts','Feeling lonely even when you are with people',
                   'The idea that you should be punished for your sins','Never feeling close to another person',
                   'The idea that something is wrong with your mind']
    appetite = ['Poor appetite']
    sleep = ['Trouble falling asleep']
    #death = ['Thoughts of death or dying']
    guilt = ['Feeling of guilt']

    survey = ['BSI-9', 'BSI-20', 'BSI-36', 'BSI-45', 'BSI-53']
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]
    survey_title = subsetDataFrame['survey_title'].to_list()
    question_title =  subsetDataFrame['question_title'].to_list()
    answer_value = subsetDataFrame['answer_value'].to_list()
    for i in range(len(answer_value)):
        if answer_value[i] == 'Refused':
            answer_value[i] = '0'
    answer =[]
    question = []
    for i in range(len(question_title)):
        new_str = unicodedata.normalize("NFKD", question_title[i])
        question.append(new_str.strip())
    som = []
    obs = []
    inte = []
    anx = []
    hos = []
    pho = []
    par = []
    psy = []
    ape = []
    slp = []
    gui = []

    for i in range(len(question_title)):
        if question[i][3:] in somatization:
            som.append(int(answer_value[i][0]))
        if question[i][4:] in somatization:
            som.append(int(answer_value[i][0]))

        if question[i][3:] in obssession:
            obs.append(int(answer_value[i][0]))
        if question[i][4:] in obssession:
            obs.append(int(answer_value[i][0]))

        if question[i][3:] in interpersonal:
            inte.append(int(answer_value[i][0]))
        if question[i][4:] in interpersonal:
            inte.append(int(answer_value[i][0]))

        if question[i][3:] in anxiety:
            anx.append(int(answer_value[i][0]))
        if question[i][4:] in anxiety:
            anx.append(int(answer_value[i][0]))

        if question[i][3:] in hosbility:
            hos.append(int(answer_value[i][0]))
        if question[i][4:] in hosbility:
            hos.append(int(answer_value[i][0]))

        if question[i][3:] in phobic:
            pho.append(int(answer_value[i][0]))
        if question[i][4:] in phobic:
            pho.append(int(answer_value[i][0]))

        if question[i][3:] in paranoid:
            par.append(int(answer_value[i][0]))
        if question[i][4:] in paranoid:
            par.append(int(answer_value[i][0]))

        if question[i][3:] in psychoticism:
            psy.append(int(answer_value[i][0]))
        if question[i][4:] in psychoticism:
            psy.append(int(answer_value[i][0]))
            
        if question[i][3:] in appetite:
            ape.append(int(answer_value[i][0]))
        if question[i][4:] in appetite:
            ape.append(int(answer_value[i][0]))
            
        if question[i][3:] in sleep:
            slp.append(int(answer_value[i][0]))
        if question[i][4:] in sleep:
            slp.append(int(answer_value[i][0])) 
            
        if question[i][3:] in guilt:
            gui.append(int(answer_value[i][0]))
        if question[i][4:] in guilt:
            gui.append(int(answer_value[i][0]))  
            
            
    psycharity = [sum(som), sum(obs), sum(inte), sum(anx), sum(hos), sum(pho), sum(par), sum(psy), 
                  sum(ape), sum(slp), sum(gui)]
    return psycharity

def BAI(subID):
    df = subfile_df()
    survey = ['BAI']
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([subID])]
    
    question_title =  subsetDataFrame['question_title'].to_list()
    answer_value = subsetDataFrame['answer_value'].to_list()

    answer =[]
    for i in range(len(question_title)):
        answer.append(int(answer_value[i][0]))
    result = sum(answer)
    return result

### cognitvie task

In [19]:
def Raven_score(subID):
    sub = int(subID)
    Raven= pd.read_csv("/Users/zyy219/Documents/Risk_project/Data/Cognition/Raven.csv").set_index("subID").T.reset_index()
    Answer = pd.read_csv("/Users/zyy219/Documents/Risk_project/Data/Cognition/Raven_Answer.txt", sep=",", header=None)
    Answer = Answer.T
    Answer.columns = ["Answer"]
    Raven["Answer"] = Answer
    Raven['Answer_copy'] = Raven['Answer']
    Raven1 = Raven.apply(lambda x: np.where(x == Raven["Answer_copy"], 1, 0), axis = 0).reset_index().drop(["Answer_copy"], axis = 1)
    final = Raven1.drop(columns = ["level_0", "index","Answer"])
    namelist = final.columns
    if sub in namelist:
        iq = final[sub].sum()
        if np.shape(iq) != ():
            iq = max(iq)
    else:
        iq = float("NaN")
    return iq


def numeracy(subID):
    Numeracy = pd.read_csv("/Users/zyy219/Documents/Risk_project/Data/Cognition/Numeracy.csv")
    values = [0, 1]
    
    Numeracy["Q1"][1: ] = Numeracy["Q1"][1: ].apply(lambda x: re.sub(r'[a-z$,]', '', str(x)))
    correctQ1 = [(Numeracy["Q1"] != "0.05"),(Numeracy["Q1"] == "0.05")]
    Numeracy["Correct%Q1"] = np.select(correctQ1, values)

    Numeracy["Q2"][1: ] = Numeracy["Q2"][1: ].apply(lambda x: re.sub(r'[a-z,]', '', str(x)))
    correctQ2 = [(Numeracy["Q2"] != "5"),(Numeracy["Q2"] == "5")]
    Numeracy["Correct%Q2"] = np.select(correctQ2, values)

    Numeracy["Q3"][1: ] = Numeracy["Q3"][1: ].apply(lambda x: re.sub(r'[a-z,]', '', str(x)))
    correctQ3 = [(Numeracy["Q3"] != "47"), (Numeracy["Q3"] == "47")]
    Numeracy["Correct%Q3"] = np.select(correctQ3, values)

    Numeracy["Q4"][1: ] = Numeracy["Q4"][1: ].apply(lambda x: re.sub(r'[a-z$,]', '', str(x)))
    correctQ4 = [(Numeracy["Q4"] != "150"),(Numeracy["Q4"] == "150")]
    Numeracy["Correct%Q4"] = np.select(correctQ4, values)

    Numeracy["Q5"][1: ] = Numeracy["Q5"][1: ].apply(lambda x: re.sub(r'[a-z,%]', '', str(x)))
    correctQ5 = [(Numeracy["Q5"] != "1"),(Numeracy["Q5"] == "1")]
    Numeracy["Correct%Q5"] = np.select(correctQ5, values)
    
    Numeracy["Q6"][1: ] = Numeracy["Q6"][1: ].apply(lambda x: re.sub(r'[a-z,%]', '', str(x)))
    correctQ6 = [(Numeracy["Q6"] != "2. Ten percent (10%)"), (Numeracy["Q6"] == "2. Ten percent (10%)")]
    Numeracy["Correct%Q6"] = np.select(correctQ6, values)
    
    Numeracy["Q7"][1: ] = Numeracy["Q7"][1: ].apply(lambda x: re.sub(r'[a-z,%]', '', str(x)))
    correctQ7 = [(Numeracy["Q7"] != "3. Something that happens 1 in 10 times"),
            (Numeracy["Q7"] == "3. Something that happens 1 in 10 times")]
    Numeracy["Correct%Q7"] = np.select(correctQ7, values)

    Numeracy["Q8"][1: ] = Numeracy["Q8"][1: ].apply(lambda x: re.sub(r'[a-z$,]', '', str(x)))
    correctQ8 = [(Numeracy["Q8"] == "400000"), 
                 (Numeracy["Q8"] != "400000")]
    Numeracy["Correct%Q8"] = np.select(correctQ8, values)

    Numeracy["Q9"][1: ] = Numeracy["Q9"][1: ].apply(lambda x: re.sub(r'[a-z$,]', '', str(x)))
    correctQ9 = [(Numeracy["Q9"] == "242"), (Numeracy["Q9"] != "242")]
    Numeracy["Correct%Q9"] = np.select(correctQ9, values)
    
    Numeracy["NumeracyFinal"] = Numeracy["Correct%Q1"] + Numeracy["Correct%Q2"] + Numeracy["Correct%Q3"] + Numeracy["Correct%Q4"] + Numeracy["Correct%Q5"] + Numeracy["Correct%Q6"] + Numeracy["Correct%Q7"] + Numeracy["Correct%Q8"] + Numeracy["Correct%Q9"]
    df = Numeracy[['Q15', 'NumeracyFinal']][1:]
    sublist = Numeracy['Q15'][1:].tolist()
    if subID in sublist:
        score = df['NumeracyFinal'][df['Q15'] == subID].tolist()[0]
    else:
        score = float('NaN')
    return score

### Emotion Score

In [20]:
def emotion_score(sub_id):
    positive = ['Happy', 'Enjoying myself', 'Warm/ friendly']
    negative =['4. Depressed/ blue', 'Angry / hostile', 'Criticized/put down \xa0\xa0\xa0', 'Worried/ anxious\xa0', 'Frustrated/annoyed', \
           'Hassled / pushed around']
    survey = ['DRM']
    df = subfile_df()
    subsetDataFrame =  df[df['survey_title'].isin(survey) & df['participant_id'].isin([sub_id])]
    time = subsetDataFrame['start_by'].unique()
    
    p_score = []
    n_score = []

    for t in range(len(time)):
        df_temp = subsetDataFrame[subsetDataFrame['start_by'].isin([time[t]])]
        q_title =  df_temp['question_title'].to_list()
    
        positive_score = 0
        negative_score = 0
        answer_title = df_temp['answer_value'].to_list()
    
        for i in range(len(q_title)):
            
            if q_title[i] in positive:
                positive_score += int(answer_title[i][0]) 
            
            if q_title[i] in negative:
                negative_score += int(answer_title[i][0])   
        
        p_score.append(positive_score)
        n_score.append(negative_score)
        
    ps = np.round(np.array(p_score)/3,2)
    ns = np.round(np.array(n_score)/6,2)
    return ps, ns



In [24]:
subID = subID_list()
subNo = len(subID)

positive_score = np.empty([subNo, 11])
negative_score = np.empty([subNo, 11])

for i in range(len(subID)):
    ps, ns = emotion_score(subID[i])
    #print(subID[i])
    if subID[i] == '1262':
        print(ps)
        ps = np.delete(ps, [1])
        ns = np.delete(ns, [1])
        print(ns)
    positive_score[i][:] = ps
    negative_score[i][:] = ns


w1 = positive_score[:,0]
w2 = np.sum(positive_score[:,1:3],axis = 1)/2
w7 = np.sum(positive_score[:,7:9],axis = 1)/2
w8 = np.sum(positive_score[:,9:11],axis = 1)/2

positive = np.empty([subNo,8])

positive[:,0] = w1
positive[:,1] = w2
positive[:,2:6] = positive_score[:,2:6]
positive[:,6] = w7
positive[:,7] = w8

#negative score
nw1 = negative_score[:,0]
nw2 = np.sum(negative_score[:,1:3],axis = 1)/2
nw7 = np.sum(negative_score[:,7:9],axis = 1)/2
nw8 = np.sum(negative_score[:,9:11],axis = 1)/2

negative = np.empty([subNo,8])

negative[:,0] = nw1
negative[:,1] = nw2
negative[:,2:6] = negative_score[:,2:6]
negative[:,6] = nw7
negative[:,7] = nw8

[3.33 3.   3.   3.33 3.33 3.67 3.33 3.   3.   3.   2.67 2.33]
[2.5  1.83 1.67 1.83 1.17 0.83 2.   1.33 1.17 1.17 2.83]


In [26]:
mean_positive = np.mean(positive,axis = 1).reshape([subNo,1])
mean_negative = np.mean(negative, axis = 1).reshape([subNo,1])

### Uncertainty Scale - IUS, LOT

In [59]:
subID = subID_list()
variable_NO = 5
trait = np.empty([subNo,variable_NO])
for i in range(len(subID)):
    
    ius_label, ius_score = IUS_score(subID[i])
    #print(ius_score)
    lot_label, lot_score = lot(subID[i])
    
    
    trait[i][0] = np.array(np.sum(ius_score))
    trait[i][1:3] = np.array(lot_score)
    trait[i][3] = mean_positive[i]
    trait[i][4] = mean_negative[i]

array([[6.200000e+01, 9.000000e+00, 1.200000e+01, 3.063750e+00,
        1.823125e+00],
       [6.900000e+01, 7.000000e+00, 9.000000e+00, 3.916250e+00,
        1.135000e+00],
       [9.600000e+01, 9.000000e+00, 1.100000e+01, 4.333125e+00,
        1.406250e+00],
       [7.300000e+01, 1.000000e+01, 1.100000e+01, 2.687500e+00,
        2.093125e+00],
       [5.500000e+01, 1.400000e+01, 6.000000e+00, 4.896875e+00,
        2.918750e-01],
       [8.100000e+01, 1.200000e+01, 7.000000e+00, 5.250000e+00,
        4.268750e-01],
       [1.080000e+02, 1.000000e+01, 1.100000e+01, 3.291250e+00,
        7.293750e-01],
       [6.700000e+01, 1.400000e+01, 6.000000e+00, 4.623750e+00,
        0.000000e+00],
       [7.900000e+01, 1.200000e+01, 1.200000e+01, 3.665625e+00,
        2.188125e+00],
       [7.200000e+01, 1.100000e+01, 1.200000e+01, 3.355000e+00,
        1.780000e+00],
       [5.100000e+01, 1.000000e+01, 1.100000e+01, 3.500000e+00,
        1.365000e+00],
       [1.020000e+02, 8.000000e+00, 1.10000

## DOSPERT

In [42]:
eb = np.empty([subNo,5])
li = np.empty([subNo,5])
rp = np.empty([subNo,5])
for i in range(len(subID)):
    dosert_label, dosert_score_EB = dosert(subID[i],'EB')
    eb[i,:] = dosert_score_EB
    desert_label, dosert_score_LI = dosert(subID[i],'Likelihood')
    li[i,:] = dosert_score_LI
    desert_label, dosert_score_RP = dosert(subID[i],'RP')
    rp[i,:] = dosert_score_RP


### Impulsivity

In [47]:
trait_imp = np.empty([subNo,4])
for i in range(len(subID)):
    basbis_label, basbis_score = bisbas(subID[i])
    bis11_score = bis11(subID[i])
    upps_score = UPPS(subID[i])
    trait_imp[i][0:2] = np.array(basbis_score)
    trait_imp[i][2] = np.array(bis11_score)
    trait_imp[i][3] = np.array(upps_score)
    



### Mental Health

In [31]:
mental_variable_No = 5
subNo = 49
trait_mental = np.empty([subNo,mental_variable_No])
for i in range(len(subID)):
    phq_score = phq(subID[i])
    stais_score = stai_s(subID[i])
    stait_score = stai_t(subID[i])
    BAI_score = BAI(subID[i])
    BSI_score = BSI(subID[i])
    trait_mental[i][0] = np.array(phq_score)
    trait_mental[i][1] = np.array(stais_score)
    trait_mental[i][2] = np.array(stait_score)
    trait_mental[i][3] = np.array(BAI_score)
    #trait_mental[i][4:mental_variable_No] = np.array(BSI_score)
    trait_mental[i][4] = np.array(np.sum(BSI_score))
    



In [61]:
var = np.concatenate((eb,li,rp,trait,trait_imp, trait_mental), axis=1)
label = ['E_EB','F_EB','S_EB','R_EB','S_EB','E_LI','F_LI','S_LI','R_LI','S_LI','E_RP','F_RP','S_RP','R_RP','S_RP',
        'IUS','lot_pos','lot_neg','drm_pos','drm_neg','BAS','BIS','BIS11','UPPS',
         'phq','stais','stait','bai','bsi']
np.shape(var)

(49, 29)

In [62]:
variables = pd.DataFrame(data=var, index = subID_list(), columns=label)
variables

Unnamed: 0,E_EB,F_EB,S_EB,R_EB,S_EB.1,E_LI,F_LI,S_LI,R_LI,S_LI.1,...,drm_neg,BAS,BIS,BIS11,UPPS,phq,stais,stait,bai,bsi
797,16.0,20.0,12.0,13.0,22.0,14.0,15.0,14.0,10.0,24.0,...,1.823125,33.0,19.0,68.0,114.0,6.0,51.0,52.0,11.0,45.0
806,14.0,19.0,16.0,14.0,19.0,10.0,13.0,15.0,15.0,13.0,...,1.135,31.0,17.0,62.0,87.0,6.0,50.0,43.0,14.0,25.0
809,23.0,20.0,10.0,18.0,29.0,23.0,16.0,23.0,17.0,36.0,...,1.40625,20.0,16.0,79.0,104.0,7.0,54.0,48.0,14.0,63.0
826,19.0,23.0,12.0,15.0,27.0,18.0,20.0,22.0,26.0,33.0,...,2.093125,30.0,15.0,63.0,134.0,5.0,43.0,46.0,6.0,28.0
843,15.0,20.0,10.0,18.0,29.0,17.0,23.0,16.0,24.0,26.0,...,0.291875,26.0,14.0,60.0,122.0,5.0,36.0,26.0,0.0,7.0
845,19.0,17.0,11.0,15.0,24.0,18.0,17.0,23.0,12.0,30.0,...,0.426875,23.0,15.0,69.0,109.0,2.0,35.0,39.0,5.0,19.0
855,16.0,17.0,8.0,24.0,33.0,21.0,13.0,32.0,19.0,38.0,...,0.729375,24.0,10.0,74.0,148.0,12.0,51.0,59.0,20.0,29.0
856,16.0,13.0,10.0,13.0,22.0,20.0,17.0,11.0,12.0,27.0,...,0.0,22.0,13.0,60.0,101.0,1.0,33.0,29.0,2.0,9.0
857,22.0,26.0,19.0,24.0,29.0,19.0,26.0,25.0,21.0,34.0,...,2.188125,29.0,15.0,77.0,142.0,2.0,46.0,41.0,10.0,66.0
1031,20.0,27.0,22.0,24.0,20.0,34.0,40.0,23.0,38.0,34.0,...,1.78,23.0,11.0,74.0,158.0,8.0,53.0,49.0,17.0,96.0


## STRAIN

In [8]:
subfile = '/Users/zyy219/Documents/NYU_RiskPreferenceProject.csv'
df = pd.read_csv(subfile)
sub_id = subID_list()
df_sub = df[['ID','PHQ','K6','StressCT','StressTH','EvntCT','DiffCT','EvntTH','DiffTH']]
#df_sub = df[['ID','DHEvntCT','DHDiffCT','DHAllCT','DHEvntTH','DHDiffTH','DHAllTH']]
subsetDataFrame =  df_sub[df_sub['ID'].isin(sub_id)]
ddf = subsetDataFrame.sort_values(by=['ID'])
trait_strain = ddf[['PHQ','K6','StressCT','StressTH','EvntCT','DiffCT','EvntTH','DiffTH']].to_numpy()


NameError: name 'subID_list' is not defined