In [1]:
# Importing modules
## helpful packages
import pandas as pd
import numpy as np
import re

## repeated printouts and wide-format text
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', None)


In [2]:
# Reading the datasets

demo_df = pd.read_excel(r"../files/Dartmouth_Data_Set.xlsx")
FEIS_df = pd.read_excel(r"../files/START_FEIS_Data.xlsx")
time_df = pd.read_excel(r"../files/Dartmouth_Time_Data.xlsx")
dict_df = pd.read_excel(r"../files/Final SIRS_Data_Dictionary_V13.1 October 2020.xlsx")

In [3]:
# Cleaning the demographics dataset

demographics = demo_df[['Local ID', 'Region', 'Date Enrolled in START', 'Gender', 'Race', 'Date of birth', 'Ethnicity',
                              'Level of Intellectual Disability', 'Psychiatric diagnoses', 'Medical diagnoses', 'Other Disabilities',
                              'Funding']]

In [4]:
# Merging datasets (FEIS and demographics)
merged = pd.merge(demographics, FEIS_df, how = 'inner', left_on = ['Local ID'], 
                  right_on = ['Respondent ID #  (SIRS Local ID)'])
merged_short_answer = merged[['Gender', 'Race', 'Local ID',
                              'What\nadvice would you give to service planners regarding the mental health service\nneeds of persons with IDD and their families?', 
                              "Was there any particular service that your\nfamily member needed that was not available?", 
                              "If yes, please describe the service."]]

merged_short_answer.columns = ['Gender', 'Race', 'ID', 'Advice', 'Missing Service', 'Service Needed']
merged_short_answer



Unnamed: 0,Gender,Race,ID,Advice,Missing Service,Service Needed
0,Male,Other: Mexican,8008815,,No,
1,Female,"Unknown, not collected",6570649,"â€œPlease be aware of her conditions and diagnosis, so many professionals are unfamiliar with the medical history of Citlalli. It is discouraging when professionals do not know Citlalli, but make recommendations for her. Also, it is discouraging when the professionals do not take the opinions of the family seriously.â€",Yes,A counselor was not and has not been made available for the last six months.
2,Female,White,434021,,Yes,In-home behavior support
3,Male,White,6580618,Declined to answer/did not know.,Yes,"""After Trevorâ€™s psychiatrist left the office, the office also stopped taking his insurance and as a result, Trevor went without a psychiatrist for a while. Trevorâ€™s family tried their best to get him in with other psychiatrists, but struggled to find one that would treat Trevor. Through SARC, Trevor was referred to Hope Services and will begin seeing a psychiatrist there on 1.27.21."""
4,Male,"Unknown, not collected",354280,"Listen to the parents, take what parents report seriously, and provide tips, not just call the cops, have options/walk parent through it.",Yes,"At home off hour support on phone or in person/respite, have removed for the night for safety reasons."
...,...,...,...,...,...,...
1092,Male,Black or African American,1013197,,No,
1093,Female,White,1100502,,No,
1094,Female,Black or African American,1132230,,Yes,Wraparound services and continuily of care
1095,Male,White,11128011,,No,


In [6]:
# Subsetting by gender
demographics_male = merged_short_answer.loc[merged_short_answer['Gender']=='Male']
demographics_female = merged_short_answer.loc[merged_short_answer['Gender']=='Female']

# Subsetting by race and creating CSV files
male_white = demographics_male[demographics_male['Race'] == "White"]
male_white_csv = male_white.to_csv("../output/male_white", index=False)
male_white_csv

male_nonwhite = demographics_male[demographics_male['Race'] != "White"]
male_nonwhite_csv = male_nonwhite.to_csv("../output/male_nonwhite", index=False)
male_nonwhite_csv

female_white = demographics_female[demographics_female['Race'] == "White"]
female_white_csv = female_white.to_csv("../output/female_white", index=False)
female_white_csv

female_nonwhite = demographics_female[demographics_female['Race'] != "White"]
female_nonwhite_csv = female_nonwhite.to_csv("../output/female_nonwhite", index=False)
female_nonwhite_csv

In [7]:
# Counting how many entries of the dataframe did not report a race

print("Number of entries where patient's race is unknown: {}".format(merged_short_answer.Race.str.contains('Unknown, not collected').sum()))

# Subsetting by gender
demographics_male = merged_short_answer.loc[merged_short_answer['Gender']=='Male']
demographics_female = merged_short_answer.loc[merged_short_answer['Gender']=='Female']
print("Number of males overall in the dataset: {}".format(demographics_male.shape[0]))
print("Number of females overall in the dataset: {}".format(demographics_female.shape[0]))
# Subsetting by race
male_white = demographics_male[demographics_male['Race'] == "White"]
male_nonwhite = demographics_male[demographics_male['Race'] != "White"]

female_white = demographics_female[demographics_female['Race'] == "White"]
female_nonwhite = demographics_female[demographics_female['Race'] != "White"]

print("Number of white males in the dataset: {}".format(male_white.shape[0]))
print("Number of non-white males in the dataset: {}".format(male_nonwhite.shape[0]))
print("Number of white females in the dataset: {}".format(female_white.shape[0]))
print("Number of non-white females in the dataset: {}".format(female_nonwhite.shape[0]))

#Investigate missingness and incidence of null values in Advice column 

print("Number of respondents who filled out Advice column: {}".format(merged_short_answer.Advice.notna().sum()))

perc_responses_white_males = (male_white.Advice.notna().sum() / male_white.shape[0]) * 100
perc_responses_nonwhite_males = (male_nonwhite.Advice.notna().sum() / male_nonwhite.shape[0]) * 100
perc_responses_white_females= (female_white.Advice.notna().sum()  / female_white.shape[0]) * 100
perc_responses_nonwhite_females = (female_nonwhite.Advice.notna().sum() / female_nonwhite.shape[0]) * 100

print("Percentage of white male respondents who filled out Advice column: {}".format(perc_responses_white_males))
print("Percentage of nonwhite male respondents who filled out Advice column: {}".format(perc_responses_nonwhite_males))
print("Percentage of white female respondents who filled out Advice column: {}".format(perc_responses_white_females))
print("Percentage of nonwhite female respondents who filled out Advice column: {}".format(perc_responses_nonwhite_females))

Number of entries where patient's race is unknown: 53
Number of males overall in the dataset: 802
Number of females overall in the dataset: 292
Number of white males in the dataset: 468
Number of non-white males in the dataset: 334
Number of white females in the dataset: 178
Number of non-white females in the dataset: 114
Number of respondents who filled out Advice column: 544
Percentage of white male respondents who filled out Advice column: 50.641025641025635
Percentage of nonwhite male respondents who filled out Advice column: 50.0
Percentage of white female respondents who filled out Advice column: 41.01123595505618
Percentage of nonwhite female respondents who filled out Advice column: 56.14035087719298
