# Demographics

This notebook contains code comparing the demographic makeup of each experimental group. 

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as st 
from matplotlib import pyplot as plt
from scipy import stats
import seaborn as sns
from plotnine import *
import statsmodels.formula.api as sm
from utils import *

%run behav_starter.py

warnings.filterwarnings('ignore', category=FutureWarning)

Controls:  47
Stress:  49


In [2]:
logs_path = './data/questionnaires/StressLearn_logs.csv'
demos_path = './data/questionnaires/StressLearn_Qs_demographics.csv'
secpt_path = './data/questionnaires/StressLearn_secpt.csv'

logs_df = pd.read_csv(logs_path)[["StressLearn_ID", "Group", "Sex", "Age"]]
logs_df["subid"] = logs_df["StressLearn_ID"].apply(lambda x: int(x[1:]))
logs_df = logs_df[np.isin(logs_df["subid"], subIDs)].drop(columns="StressLearn_ID")

demos_df = pd.read_csv(demos_path)
demos_df["subid"] = demos_df["subID"].apply(lambda x: int(x[1:]))
demos_df = demos_df[["subid", "Race_nu", "Hispanic"]]

logs_df = pd.merge(logs_df, demos_df, on="subid", how="left").reset_index(drop=True)

groups = ["Control", "Stress"]

for group in groups:
    grp_logs_df = logs_df[logs_df["Group"] == group]
    print(group)

    n_total = len(grp_logs_df)
    print(f'Total: {n_total}')
    
    n_male = len(grp_logs_df[grp_logs_df["Sex"] == "Male"])
    print(f'Male number: {n_male}')
    print(f'Male percent: {100*n_male/len(grp_logs_df):.02f}%')
    
    print(f'Age range: {np.min(grp_logs_df["Age"])} - {np.max(grp_logs_df["Age"])}')
    print(f'Age median: {np.median(grp_logs_df["Age"])}')

    print()

print("All")
n_total = len(logs_df)
n_male = len(logs_df[logs_df["Sex"] == "Male"])
print(f'Male number: {n_male}')
print(f'Male percent: {100*n_male/len(logs_df):.02f}%')

print(f'Age range: {np.min(logs_df["Age"])} - {np.max(logs_df["Age"])}')
print(f'Age median: {np.median(logs_df["Age"])}')


Control
Total: 47
Male number: 19
Male percent: 40.43%
Age range: 18.0 - 45.0
Age median: 24.0

Stress
Total: 49
Male number: 20
Male percent: 40.82%
Age range: 18.0 - 40.0
Age median: 23.0

All
Male number: 39
Male percent: 40.62%
Age range: 18.0 - 45.0
Age median: 24.0


## Compare age across groups

In [3]:
ctrl_age = np.array(logs_df[logs_df["Group"] == "Control"]["Age"])
stress_age = np.array(logs_df[logs_df["Group"] == "Stress"]["Age"])

scipy.stats.ttest_ind(ctrl_age, stress_age)

TtestResult(statistic=1.3957733387428097, pvalue=0.16607093664788028, df=94.0)

## Compare sex across groups

In [4]:
ctrl_sex = np.array(logs_df[logs_df["Group"] == "Control"].groupby("Sex").count()["subid"])
stress_sex = np.array(logs_df[logs_df["Group"] == "Stress"].groupby("Sex").count()["subid"])

scipy.stats.chi2_contingency(np.array([ctrl_sex, stress_sex]), correction=True)

Chi2ContingencyResult(statistic=0.0, pvalue=1.0, dof=1, expected_freq=array([[27.90625, 19.09375],
       [29.09375, 19.90625]]))

## Compare PSS score across groups

In [5]:
def pss_recode(col, s):
    if col in ['pss_2', 'pss_3', 'pss_4', 'pss_7', 'pss_10', 'pss_11']:
        pss_dic = {"Never": 0, "Almost Never": 1, "Sometimes": 2, "Fairly Often": 3, "Very Often": 4}
    elif col in ['pss_5', 'pss_6', 'pss_8', 'pss_9']:
        pss_dic = {"Never": 4, "Almost Never": 3, "Sometimes": 2, "Fairly Often": 1, "Very Often": 0}

    return pss_dic[s]

In [6]:
pss_file = "./data/questionnaires/StressLearn_Qs_PSS.csv"
subid_col = "src_subject_id"
pss_cols = ["pss_2", "pss_3", "pss_4", "pss_5", "pss_6", "pss_7", "pss_8", "pss_9", "pss_10", "pss_11"]

pss_df = pd.read_csv(pss_file)[[subid_col] + pss_cols].iloc[2:].reset_index(drop=True).rename(columns = {"src_subject_id": "subID"})

for col in pss_cols:
    pss_df[col] = pss_df[col].apply(lambda x: pss_recode(col, x))

pss_df["score"] = pss_df.mean(axis=1, numeric_only=True)
pss_df["subID"] = pss_df["subID"].apply(lambda x: int(x[1:]))
pss_df = pd.merge(pss_df, subid2group, on="subID", how="left")
pss_df = pss_df[np.isin(pss_df["subID"], subIDs)]# filter to include only participants eligible for behav analysis

ctrl_pss = np.array(pss_df[pss_df["group"] == "Control"]["score"])
stress_pss = np.array(pss_df[pss_df["group"] == "Stress"]["score"])
scipy.stats.ttest_ind(ctrl_pss, stress_pss)

TtestResult(statistic=0.6932851944916656, pvalue=0.4898400926250366, df=94.0)

## Demographics across scanners

In [24]:
%run fmri_starter.py
print()

logs_df = pd.read_csv(logs_path)[["StressLearn_ID", "Group", "Sex", "Age"]]
logs_df["subid"] = logs_df["StressLearn_ID"].apply(lambda x: int(x[1:]))
logs_df = logs_df[np.isin(logs_df["subid"], subIDs)].drop(columns="StressLearn_ID")

brainworks_first_sub = 56

bic_logs_df = logs_df[logs_df['subid'] < 56]
brainworks_logs_df = logs_df[logs_df['subid'] >= 56]

for site, df in zip(["BIC", "Brainworks"], [bic_logs_df, brainworks_logs_df]):
    print(site)
    for group in groups:
        grp_df = df[df["Group"] == group]
        print(group)

        n_total = len(grp_df)
        print(f'Total: {n_total}')
        n_male = len(grp_df[grp_df["Sex"] == "Male"])
        print(f'Male number: {n_male}')
        print(f'Male percent: {100*n_male/n_total:.02f}%')
        
        print(f'Age range: {np.min(grp_df["Age"])} - {np.max(grp_df["Age"])}')
        print(f'Age median: {np.median(grp_df["Age"])}')
    
        print()
    print()



Controls:  43
Stress:  45

BIC
Control
Total: 21
Male number: 5
Male percent: 23.81%
Age range: 18.0 - 35.0
Age median: 24.0

Stress
Total: 21
Male number: 6
Male percent: 28.57%
Age range: 18.0 - 28.0
Age median: 23.0


Brainworks
Control
Total: 22
Male number: 14
Male percent: 63.64%
Age range: 18.0 - 45.0
Age median: 25.0

Stress
Total: 24
Male number: 12
Male percent: 50.00%
Age range: 18.0 - 40.0
Age median: 22.0




## SECPT water temperature

In [18]:
secpt_df = pd.read_csv(secpt_path)
secpt_df["subID"] = secpt_df["StressLearn ID"].apply(lambda x: int(x[1:]))
secpt_df = secpt_df[np.isin(secpt_df["subID"], subIDs)].drop(columns="StressLearn ID").dropna().reset_index()

print("Mean stress secpt temp", np.mean(secpt_df[secpt_df["Group"] == "stress"]["SECPT water temperature"]))
print("SD stress secpt temp", np.std(secpt_df[secpt_df["Group"] == "stress"]["SECPT water temperature"]))
print("Mean control secpt temp", np.mean(secpt_df[secpt_df["Group"] == "control"]["SECPT water temperature"]))
print("SD control secpt temp", np.std(secpt_df[secpt_df["Group"] == "control"]["SECPT water temperature"]))

Mean stress secpt temp 2.7916666666666665
SD stress secpt temp 1.0197698542100346
Mean control secpt temp 36.276595744680854
SD control secpt temp 4.210938782780191
