In [1]:
# Import dependencies
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sqlalchemy import create_engine

In [2]:
# Load data from Resources as tab seperated file into dataframe
pd.set_option('display.max_columns', None)
mental_df = pd.read_csv('./Resources/cleaned_data.csv')
mental_df

Unnamed: 0,Does your employer provide mental health benefits as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided coverage?,Does your employer offer resources to learn more about mental health concerns and options for seeking help?,"If a mental health issue prompted you to request a medical leave from work, asking for that leave would be:",Do you think that discussing a mental health disorder with your employer would have negative consequences?,Do you think that discussing a physical health issue with your employer would have negative consequences?,Do you feel that your employer takes mental health as seriously as physical health?,Have you heard of or observed negative consequences for co-workers who have been open about mental health issues in your workplace?,Do you have previous employers?,Have your previous employers provided mental health benefits?,Were you aware of the options for mental health care provided by your previous employers?,Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?,Was your anonymity protected if you chose to take advantage of mental health or substance abuse treatment resources with previous employers?,Do you think that discussing a mental health disorder with previous employers would have negative consequences?,Would you have been willing to discuss a mental health issue with your direct supervisor(s)?,Did you feel that your previous employers took mental health as seriously as physical health?,Did you hear of or observe negative consequences for co-workers with mental health issues in your previous workplaces?,Would you bring up a mental health issue with a potential employer in an interview?,Do you feel that being identified as a person with a mental health issue would hurt your career?,Do you think that team members/co-workers would view you more negatively if they knew you suffered from a mental health issue?,How willing would you be to share with friends and family that you have a mental illness?,Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?,Do you have a family history of mental illness?,Have you had a mental health disorder in the past?,Do you currently have a mental health disorder?,Have you been diagnosed with a mental health condition by a medical professional?,Have you ever sought treatment for a mental health issue from a mental health professional?,"If you have a mental health issue, do you feel that it interferes with your work when being treated effectively?","If you have a mental health issue, do you feel that it interferes with your work when NOT being treated effectively?",What is your gender?,What country do you live in?,What country do you work in?
0,No,Yes,Yes,Somewhat easy,No,No,Yes,No,1,"Yes, they all did",I was aware of some,None did,"Yes, always",None of them,Some of my previous employers,Some did,None of them,No,"No, I don't think it would","No, I don't think they would",Somewhat open,No,Yes,Yes,Yes,Yes,1,Rarely,Sometimes,Male,United States of America,United States of America
1,Yes,Yes,No,Neither easy nor difficult,Yes,Maybe,No,No,1,I don't know,N/A (not currently aware),Some did,I don't know,Some of them,Some of my previous employers,Some did,Some of them,No,"Yes, I think it would",Maybe,Somewhat open,"Yes, I experienced",Yes,Yes,Yes,Yes,1,Sometimes,Sometimes,Female,United States of America,United States of America
2,Yes,I am not sure,Yes,Somewhat easy,Yes,Yes,No,Yes,1,"No, none did","Yes, I was aware of all of them",None did,I don't know,"Yes, all of them","No, at none of my previous employers",None did,Some of them,Maybe,"Yes, I think it would",Maybe,Somewhat open,"Yes, I experienced",No,No,Yes,No,1,Not applicable to me,Often,Male,United Kingdom,United Kingdom
3,I don't know,No,No,Somewhat easy,No,No,Yes,No,1,Some did,I was aware of some,None did,I don't know,None of them,"Yes, at all of my previous employers",Some did,None of them,Yes,"Yes, I think it would","No, I don't think they would",Not applicable to me (I do not have a mental i...,No,No,No,No,No,0,Not applicable to me,Not applicable to me,Male,United States of America,United States of America
4,Yes,Yes,Yes,Very easy,No,No,I don't know,No,1,Some did,I was aware of some,Some did,Sometimes,Some of them,Some of my previous employers,Some did,Some of them,No,Maybe,Maybe,Somewhat open,"Yes, I observed",Yes,Yes,Yes,Yes,1,Sometimes,Often,Female,United States of America,United States of America
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
856,Yes,I am not sure,Yes,Somewhat easy,No,No,Yes,No,1,Some did,"No, I only became aware later",Some did,"Yes, always",Some of them,"No, at none of my previous employers",Some did,None of them,No,"Yes, I think it would",Maybe,Somewhat not open,No,I don't know,Maybe,Maybe,No,0,Rarely,Rarely,Male,Canada,Canada
857,I don't know,I am not sure,I don't know,Somewhat easy,Maybe,No,I don't know,No,1,I don't know,N/A (not currently aware),I don't know,I don't know,Some of them,Some of my previous employers,Some did,None of them,Maybe,"Yes, it has",Maybe,Somewhat open,"Yes, I experienced",I don't know,Yes,Yes,Yes,1,Rarely,Often,Female,Canada,Canada
858,Yes,No,No,Somewhat easy,No,No,Yes,No,1,Some did,I was aware of some,None did,I don't know,Some of them,"No, at none of my previous employers",None did,"Yes, all of them",No,Maybe,"No, I don't think they would",Very open,"Yes, I experienced",Yes,Yes,Yes,Yes,1,Rarely,Often,Female,United States of America,United States of America
859,Yes,Yes,Yes,Somewhat difficult,Maybe,Maybe,I don't know,Yes,1,Some did,I was aware of some,None did,Sometimes,"Yes, all of them",Some of my previous employers,None did,Some of them,No,"Yes, it has","No, I don't think they would",Somewhat open,"Yes, I observed",Yes,Yes,Maybe,Yes,1,Rarely,Sometimes,Male,United States of America,United States of America


In [3]:
# build a dataframe of the questions
dict = {}
count = 0
for column in mental_df.columns:
    
    dict[count] = [column]
    count = count + 1
    
    
dict

questions_df = pd.DataFrame.from_dict(dict, orient='index', columns=['Question'])
questions_df

Unnamed: 0,Question
0,Does your employer provide mental health benef...
1,Do you know the options for mental health care...
2,Does your employer offer resources to learn mo...
3,If a mental health issue prompted you to reque...
4,Do you think that discussing a mental health d...
5,Do you think that discussing a physical health...
6,Do you feel that your employer takes mental he...
7,Have you heard of or observed negative consequ...
8,Do you have previous employers?
9,Have your previous employers provided mental h...


In [4]:
# rename the columns in the dataframe to numbers
column_rename = {}
count = 0
for column in mental_df.columns:
    
    column_rename[column] = count
    count = count + 1
    
    
column_rename

renamed_df = mental_df.rename(columns=column_rename)
renamed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0,No,Yes,Yes,Somewhat easy,No,No,Yes,No,1,"Yes, they all did",I was aware of some,None did,"Yes, always",None of them,Some of my previous employers,Some did,None of them,No,"No, I don't think it would","No, I don't think they would",Somewhat open,No,Yes,Yes,Yes,Yes,1,Rarely,Sometimes,Male,United States of America,United States of America
1,Yes,Yes,No,Neither easy nor difficult,Yes,Maybe,No,No,1,I don't know,N/A (not currently aware),Some did,I don't know,Some of them,Some of my previous employers,Some did,Some of them,No,"Yes, I think it would",Maybe,Somewhat open,"Yes, I experienced",Yes,Yes,Yes,Yes,1,Sometimes,Sometimes,Female,United States of America,United States of America
2,Yes,I am not sure,Yes,Somewhat easy,Yes,Yes,No,Yes,1,"No, none did","Yes, I was aware of all of them",None did,I don't know,"Yes, all of them","No, at none of my previous employers",None did,Some of them,Maybe,"Yes, I think it would",Maybe,Somewhat open,"Yes, I experienced",No,No,Yes,No,1,Not applicable to me,Often,Male,United Kingdom,United Kingdom
3,I don't know,No,No,Somewhat easy,No,No,Yes,No,1,Some did,I was aware of some,None did,I don't know,None of them,"Yes, at all of my previous employers",Some did,None of them,Yes,"Yes, I think it would","No, I don't think they would",Not applicable to me (I do not have a mental i...,No,No,No,No,No,0,Not applicable to me,Not applicable to me,Male,United States of America,United States of America
4,Yes,Yes,Yes,Very easy,No,No,I don't know,No,1,Some did,I was aware of some,Some did,Sometimes,Some of them,Some of my previous employers,Some did,Some of them,No,Maybe,Maybe,Somewhat open,"Yes, I observed",Yes,Yes,Yes,Yes,1,Sometimes,Often,Female,United States of America,United States of America
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
856,Yes,I am not sure,Yes,Somewhat easy,No,No,Yes,No,1,Some did,"No, I only became aware later",Some did,"Yes, always",Some of them,"No, at none of my previous employers",Some did,None of them,No,"Yes, I think it would",Maybe,Somewhat not open,No,I don't know,Maybe,Maybe,No,0,Rarely,Rarely,Male,Canada,Canada
857,I don't know,I am not sure,I don't know,Somewhat easy,Maybe,No,I don't know,No,1,I don't know,N/A (not currently aware),I don't know,I don't know,Some of them,Some of my previous employers,Some did,None of them,Maybe,"Yes, it has",Maybe,Somewhat open,"Yes, I experienced",I don't know,Yes,Yes,Yes,1,Rarely,Often,Female,Canada,Canada
858,Yes,No,No,Somewhat easy,No,No,Yes,No,1,Some did,I was aware of some,None did,I don't know,Some of them,"No, at none of my previous employers",None did,"Yes, all of them",No,Maybe,"No, I don't think they would",Very open,"Yes, I experienced",Yes,Yes,Yes,Yes,1,Rarely,Often,Female,United States of America,United States of America
859,Yes,Yes,Yes,Somewhat difficult,Maybe,Maybe,I don't know,Yes,1,Some did,I was aware of some,None did,Sometimes,"Yes, all of them",Some of my previous employers,None did,Some of them,No,"Yes, it has","No, I don't think they would",Somewhat open,"Yes, I observed",Yes,Yes,Maybe,Yes,1,Rarely,Sometimes,Male,United States of America,United States of America


In [5]:
# store data in database
engine = create_engine('sqlite:///mental_health.db', echo=False)
renamed_df.to_sql('pre_encoded_survey', con=engine, if_exists='replace')
questions_df.to_sql('pre_encoded_questions', con=engine, if_exists='replace')
#engine.execute("SELECT * FROM pre_encoded").fetchall() 

In [6]:
# load data back into a dataframe from the database
mental_df = pd.read_sql_table('pre_encoded_survey', 'sqlite:///mental_health.db').drop(columns=['index'])
mental_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0,No,Yes,Yes,Somewhat easy,No,No,Yes,No,1,"Yes, they all did",I was aware of some,None did,"Yes, always",None of them,Some of my previous employers,Some did,None of them,No,"No, I don't think it would","No, I don't think they would",Somewhat open,No,Yes,Yes,Yes,Yes,1,Rarely,Sometimes,Male,United States of America,United States of America
1,Yes,Yes,No,Neither easy nor difficult,Yes,Maybe,No,No,1,I don't know,N/A (not currently aware),Some did,I don't know,Some of them,Some of my previous employers,Some did,Some of them,No,"Yes, I think it would",Maybe,Somewhat open,"Yes, I experienced",Yes,Yes,Yes,Yes,1,Sometimes,Sometimes,Female,United States of America,United States of America
2,Yes,I am not sure,Yes,Somewhat easy,Yes,Yes,No,Yes,1,"No, none did","Yes, I was aware of all of them",None did,I don't know,"Yes, all of them","No, at none of my previous employers",None did,Some of them,Maybe,"Yes, I think it would",Maybe,Somewhat open,"Yes, I experienced",No,No,Yes,No,1,Not applicable to me,Often,Male,United Kingdom,United Kingdom
3,I don't know,No,No,Somewhat easy,No,No,Yes,No,1,Some did,I was aware of some,None did,I don't know,None of them,"Yes, at all of my previous employers",Some did,None of them,Yes,"Yes, I think it would","No, I don't think they would",Not applicable to me (I do not have a mental i...,No,No,No,No,No,0,Not applicable to me,Not applicable to me,Male,United States of America,United States of America
4,Yes,Yes,Yes,Very easy,No,No,I don't know,No,1,Some did,I was aware of some,Some did,Sometimes,Some of them,Some of my previous employers,Some did,Some of them,No,Maybe,Maybe,Somewhat open,"Yes, I observed",Yes,Yes,Yes,Yes,1,Sometimes,Often,Female,United States of America,United States of America
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
856,Yes,I am not sure,Yes,Somewhat easy,No,No,Yes,No,1,Some did,"No, I only became aware later",Some did,"Yes, always",Some of them,"No, at none of my previous employers",Some did,None of them,No,"Yes, I think it would",Maybe,Somewhat not open,No,I don't know,Maybe,Maybe,No,0,Rarely,Rarely,Male,Canada,Canada
857,I don't know,I am not sure,I don't know,Somewhat easy,Maybe,No,I don't know,No,1,I don't know,N/A (not currently aware),I don't know,I don't know,Some of them,Some of my previous employers,Some did,None of them,Maybe,"Yes, it has",Maybe,Somewhat open,"Yes, I experienced",I don't know,Yes,Yes,Yes,1,Rarely,Often,Female,Canada,Canada
858,Yes,No,No,Somewhat easy,No,No,Yes,No,1,Some did,I was aware of some,None did,I don't know,Some of them,"No, at none of my previous employers",None did,"Yes, all of them",No,Maybe,"No, I don't think they would",Very open,"Yes, I experienced",Yes,Yes,Yes,Yes,1,Rarely,Often,Female,United States of America,United States of America
859,Yes,Yes,Yes,Somewhat difficult,Maybe,Maybe,I don't know,Yes,1,Some did,I was aware of some,None did,Sometimes,"Yes, all of them",Some of my previous employers,None did,Some of them,No,"Yes, it has","No, I don't think they would",Somewhat open,"Yes, I observed",Yes,Yes,Maybe,Yes,1,Rarely,Sometimes,Male,United States of America,United States of America


In [7]:
# Generate our categorical variable list
mental_cat = mental_df.dtypes[mental_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
mental_df[mental_cat].nunique()

0     4
1     3
2     3
3     6
4     3
5     3
6     3
7     2
9     4
10    4
11    4
12    4
13    4
14    4
15    4
16    3
17    3
18    5
19    5
20    6
21    4
22    3
23    3
24    3
25    2
27    5
28    5
29    3
30    7
31    8
dtype: int64

In [8]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(mental_df[mental_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(mental_cat)
encode_df.head()

Unnamed: 0,0_I don't know,0_No,0_Not eligible for coverage / N/A,0_Yes,1_I am not sure,1_No,1_Yes,2_I don't know,2_No,2_Yes,3_I don't know,3_Neither easy nor difficult,3_Somewhat difficult,3_Somewhat easy,3_Very difficult,3_Very easy,4_Maybe,4_No,4_Yes,5_Maybe,5_No,5_Yes,6_I don't know,6_No,6_Yes,7_No,7_Yes,9_I don't know,"9_No, none did",9_Some did,"9_Yes, they all did",10_I was aware of some,10_N/A (not currently aware),"10_No, I only became aware later","10_Yes, I was aware of all of them",11_I don't know,11_None did,11_Some did,"11_Yes, they all did",12_I don't know,12_No,12_Sometimes,"12_Yes, always",13_I don't know,13_None of them,13_Some of them,"13_Yes, all of them",14_I don't know,"14_No, at none of my previous employers",14_Some of my previous employers,"14_Yes, at all of my previous employers",15_I don't know,15_None did,15_Some did,"15_Yes, they all did",16_None of them,16_Some of them,"16_Yes, all of them",17_Maybe,17_No,17_Yes,18_Maybe,"18_No, I don't think it would","18_No, it has not","18_Yes, I think it would","18_Yes, it has",19_Maybe,"19_No, I don't think they would","19_No, they do not","19_Yes, I think they would","19_Yes, they do",20_Neutral,20_Not applicable to me (I do not have a mental illness),20_Not open at all,20_Somewhat not open,20_Somewhat open,20_Very open,21_Maybe/Not sure,21_No,"21_Yes, I experienced","21_Yes, I observed",22_I don't know,22_No,22_Yes,23_Maybe,23_No,23_Yes,24_Maybe,24_No,24_Yes,25_No,25_Yes,27_Never,27_Not applicable to me,27_Often,27_Rarely,27_Sometimes,28_Never,28_Not applicable to me,28_Often,28_Rarely,28_Sometimes,29_Female,29_Male,29_Other,30_Australia,30_Canada,30_Germany,30_Netherlands,30_Other,30_United Kingdom,30_United States of America,31_Australia,31_Canada,31_Germany,31_Netherlands,31_Other,31_Sweden,31_United Kingdom,31_United States of America
0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [9]:
# Merge one-hot encoded features and drop the originals
mental_df = mental_df.merge(encode_df,left_index=True, right_index=True)
mental_df = mental_df.drop(mental_cat,1)
mental_df.head()

Unnamed: 0,8,26,0_I don't know,0_No,0_Not eligible for coverage / N/A,0_Yes,1_I am not sure,1_No,1_Yes,2_I don't know,2_No,2_Yes,3_I don't know,3_Neither easy nor difficult,3_Somewhat difficult,3_Somewhat easy,3_Very difficult,3_Very easy,4_Maybe,4_No,4_Yes,5_Maybe,5_No,5_Yes,6_I don't know,6_No,6_Yes,7_No,7_Yes,9_I don't know,"9_No, none did",9_Some did,"9_Yes, they all did",10_I was aware of some,10_N/A (not currently aware),"10_No, I only became aware later","10_Yes, I was aware of all of them",11_I don't know,11_None did,11_Some did,"11_Yes, they all did",12_I don't know,12_No,12_Sometimes,"12_Yes, always",13_I don't know,13_None of them,13_Some of them,"13_Yes, all of them",14_I don't know,"14_No, at none of my previous employers",14_Some of my previous employers,"14_Yes, at all of my previous employers",15_I don't know,15_None did,15_Some did,"15_Yes, they all did",16_None of them,16_Some of them,"16_Yes, all of them",17_Maybe,17_No,17_Yes,18_Maybe,"18_No, I don't think it would","18_No, it has not","18_Yes, I think it would","18_Yes, it has",19_Maybe,"19_No, I don't think they would","19_No, they do not","19_Yes, I think they would","19_Yes, they do",20_Neutral,20_Not applicable to me (I do not have a mental illness),20_Not open at all,20_Somewhat not open,20_Somewhat open,20_Very open,21_Maybe/Not sure,21_No,"21_Yes, I experienced","21_Yes, I observed",22_I don't know,22_No,22_Yes,23_Maybe,23_No,23_Yes,24_Maybe,24_No,24_Yes,25_No,25_Yes,27_Never,27_Not applicable to me,27_Often,27_Rarely,27_Sometimes,28_Never,28_Not applicable to me,28_Often,28_Rarely,28_Sometimes,29_Female,29_Male,29_Other,30_Australia,30_Canada,30_Germany,30_Netherlands,30_Other,30_United Kingdom,30_United States of America,31_Australia,31_Canada,31_Germany,31_Netherlands,31_Other,31_Sweden,31_United Kingdom,31_United States of America
0,1,1,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1,1,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,1,1,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1,0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1,1,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [10]:
# print list of questions and columns
dict

{0: ['Does your employer provide mental health benefits as part of healthcare coverage?'],
 1: ['Do you know the options for mental health care available under your employer-provided coverage?'],
 2: ['Does your employer offer resources to learn more about mental health concerns and options for seeking help?'],
 3: ['If a mental health issue prompted you to request a medical leave from work, asking for that leave would be:'],
 4: ['Do you think that discussing a mental health disorder with your employer would have negative consequences?'],
 5: ['Do you think that discussing a physical health issue with your employer would have negative consequences?'],
 6: ['Do you feel that your employer takes mental health as seriously as physical health?'],
 7: ['Have you heard of or observed negative consequences for co-workers who have been open about mental health issues in your workplace?'],
 8: ['Do you have previous employers?'],
 9: ['Have your previous employers provided mental health benefi

In [11]:
# Define the target set.
y = mental_df["25_Yes"]

# Define the features set.
X = mental_df.drop(columns=["25_Yes","25_No"])

In [12]:
# Splitting into Train and Test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [13]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

In [14]:
# Fitting the model
rf_model = rf_model.fit(X_train, y_train)

In [15]:
# Making predictions using the testing data.
predictions = rf_model.predict(X_test)
predictions

array([0., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 1., 1., 0., 1.,
       1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1.,
       1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 1.,
       1., 0., 1., 0., 1., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1.,
       0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1.,
       0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 1., 1.,
       0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1.,
       0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 0.,
       1., 1., 0., 1., 0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0.,
       0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 0., 1.,
       0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 1.,
       0., 1., 1., 0., 1., 1., 1., 0., 1., 0., 0., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 0., 0., 1., 1., 0., 1., 0.])

In [16]:
# Calculating the confusion matrix.
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,75,23
Actual 1,7,111


In [17]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)

In [18]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,75,23
Actual 1,7,111


Accuracy Score : 0.8611111111111112
Classification Report
              precision    recall  f1-score   support

         0.0       0.91      0.77      0.83        98
         1.0       0.83      0.94      0.88       118

    accuracy                           0.86       216
   macro avg       0.87      0.85      0.86       216
weighted avg       0.87      0.86      0.86       216



In [19]:
# Calculate feature importance in the Random Forest model.
importances = rf_model.feature_importances_
importances

array([0.00000000e+00, 1.10419050e-01, 5.67529724e-03, 1.65687097e-03,
       3.34889219e-03, 4.77398577e-03, 2.86263014e-03, 6.59232613e-03,
       6.87360840e-03, 4.75727324e-03, 4.52849706e-03, 4.32812773e-03,
       3.84626568e-03, 2.44265684e-03, 4.74462438e-03, 3.93220193e-03,
       1.96594474e-03, 2.45223494e-03, 4.32743555e-03, 4.14851247e-03,
       2.76312046e-03, 3.94917584e-03, 3.04315937e-03, 2.26892999e-03,
       4.42000131e-03, 3.18407639e-03, 3.68580684e-03, 2.50538604e-03,
       1.92557573e-03, 4.96987345e-03, 4.02072982e-03, 7.53026483e-03,
       2.36400258e-03, 5.49362358e-03, 6.82871236e-03, 2.98918112e-03,
       1.99578370e-03, 1.32254134e-03, 3.64470239e-03, 4.60907129e-03,
       8.87053686e-04, 4.09469487e-03, 1.66981912e-03, 3.11731938e-03,
       2.70249597e-03, 3.47603523e-03, 2.49857420e-03, 4.73271396e-03,
       5.25853416e-03, 1.09213371e-03, 3.55670457e-03, 3.82392394e-03,
       1.48931802e-03, 5.92142403e-03, 4.83229807e-03, 5.52944146e-03,
      

In [20]:
# We can sort the features by their importance.
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.12024805797660318, '23_Yes'),
 (0.11041904981287384, '26'),
 (0.08948881936577566, '24_Yes'),
 (0.07240868904721894, '27_Not applicable to me'),
 (0.060197667718725555, '23_No'),
 (0.043720185842798506, '28_Not applicable to me'),
 (0.036815395479663025, '24_No'),
 (0.02912006295329515, '28_Often'),
 (0.020877304353417295, '22_No'),
 (0.01873988058843284, '22_Yes'),
 (0.01588588843548007, '23_Maybe'),
 (0.015299481191566024, '27_Rarely'),
 (0.013867646180869025, '24_Maybe'),
 (0.009319877532035678, '27_Sometimes'),
 (0.009232451751315603, '28_Sometimes'),
 (0.007530264833722061, '9_Some did'),
 (0.006873608396547313, '1_Yes'),
 (0.006828712358086473, '10_N/A (not currently aware)'),
 (0.006592326132076189, '1_No'),
 (0.006098751799722544, '21_Yes, I experienced'),
 (0.0059214240286992025, "15_I don't know"),
 (0.005675297236103962, "0_I don't know"),
 (0.005529441456887535, '15_Some did'),
 (0.005493623584711995, '10_I was aware of some'),
 (0.005258534156281813, '13_Yes, all of th

In [21]:
# save our model to use later
import pickle

# Save to file in the current working directory
pkl_filename = "pickle_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(rf_model, file)

# Load from file
with open(pkl_filename, 'rb') as file:
    pickle_model = pickle.load(file)
    
# Calculate the accuracy score and predict target values
score = pickle_model.score(X_test, y_test)
print("Test score: {0:.2f} %".format(100 * score))
Ypredict = pickle_model.predict(X_test)

Test score: 86.11 %
