In [3]:
import pandas as pd
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from scipy.stats import chi2_contingency
print("Setup Complete")

In [2]:
# Specify the path of the CSV file to read
my_filepath = "Mental Health Dataset.csv"


In [3]:
# Read the file into a variable my_data
mental_health = pd.read_csv('Mental Health Dataset.csv', index_col="Country", parse_dates=True)

In [4]:
# Examine the dataset
mental_health

In [5]:
# List all the columns in the dataset
columns_list = mental_health.columns

# Print the list of column names
print(columns_list)

In [6]:
# Check for missing values in each column
missing_values_count = mental_health.isnull().sum()

# Print the count of missing values for each column
print(missing_values_count)

In [7]:
# Drop Rows with Missing self_employed Values
mental_health = mental_health.dropna(subset=['self_employed'])

In [8]:
# Check for missing values in each column
missing_values_count = mental_health.isnull().sum()

# Print the count of missing values for each column
print(missing_values_count)

In [9]:
# Step 1: Analyze the distribution of genders
gender_distribution = mental_health['Gender'].value_counts()
print("Gender Distribution:\n", gender_distribution)

In [10]:
# Step 2: Examine the relationship between gender and treatment-seeking
treatment_by_gender = pd.crosstab(mental_health['Gender'], mental_health['treatment'])
print("\nTreatment by Gender:\n", treatment_by_gender)

In [11]:
# Calculate proportions of treatment-seeking by gender for a clearer comparison
treatment_by_gender_proportions = treatment_by_gender.div(treatment_by_gender.sum(1), axis=0)
print("\nProportions of Treatment by Gender:\n", treatment_by_gender_proportions)

In [12]:
# Gender and Mental Health: How do mental health issues differ across genders? 
# Are certain genders more likely to seek treatment?
# Step 3: Visualize the findings
# Plotting the distribution of gender
plt.figure(figsize=(8, 5))
sns.countplot(data=mental_health, x='Gender')
plt.title('Distribution of Gender')
plt.xticks(rotation=45)
plt.show()

# Plotting the proportions of individuals seeking treatment by gender
treatment_by_gender_proportions.plot(kind='bar', stacked=True)
plt.title('Proportion of Individuals Seeking Treatment by Gender')
plt.ylabel('Proportion')
plt.xlabel('Gender')
plt.xticks(rotation=45)
plt.legend(title='Treatment', labels=['No', 'Yes'])
plt.show()

In [13]:
# Is there a correlation between one's occupation 
# and their mental health status or treatment-seeking behavior?

# Step 1: Cross-tabulation
occupation_treatment_ct = pd.crosstab(mental_health['Occupation'], mental_health['treatment'])

# Step 2: Visualization
# Convert counts to proportions for better comparison
occupation_treatment_prop = occupation_treatment_ct.div(occupation_treatment_ct.sum(1), axis=0)
occupation_treatment_prop.plot(kind='bar', stacked=True, figsize=(10, 6))
plt.title('Treatment-Seeking Behavior by Occupation')
plt.xlabel('Occupation')
plt.ylabel('Proportion Seeking Treatment')
plt.legend(title='Treatment', labels=['No', 'Yes'])
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Step 3: Statistical Test (Chi-square Test)
chi2, p, dof, expected = chi2_contingency(occupation_treatment_ct)
print(f"Chi-square Statistic: {chi2}, p-value: {p}")

In [14]:
# How does having a family history of mental health issues influence 
# an individual's likelihood of experiencing similar issues or seeking treatment?

# Analyze the relationship between family history and personal mental health history
cross_tab_history = pd.crosstab(mental_health['family_history'], mental_health['Mental_Health_History'])

# Analyze the relationship between family history and treatment-seeking behavior
cross_tab_treatment = pd.crosstab(mental_health['family_history'], mental_health['treatment'])

# Visualize the relationship between family history and personal mental health history
cross_tab_history.plot(kind='bar', stacked=True, figsize=(10, 6))
plt.title('Personal Mental Health History vs. Family History of Mental Health Issues')
plt.xlabel('Family History of Mental Health Issues')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.legend(title='Personal Mental Health History')
plt.show()

# Visualize the relationship between family history and treatment-seeking behavior
cross_tab_treatment.plot(kind='bar', stacked=True, figsize=(10, 6))
plt.title('Treatment Seeking Behavior vs. Family History of Mental Health Issues')
plt.xlabel('Family History of Mental Health Issues')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.legend(title='Treatment Seeking Behavior')
plt.show()


In [15]:
# Does spending more days indoors correlate with 
# higher levels of reported stress or changes in mental health habits?

# Analyze the relationship between days indoors and growing stress
cross_tab_stress = pd.crosstab(mental_health['Days_Indoors'], mental_health['Growing_Stress'])

# Analyze the relationship between days indoors and changes in habits
cross_tab_habits = pd.crosstab(mental_health['Days_Indoors'], mental_health['Changes_Habits'])

# Visualize the relationship between days indoors and growing stress
cross_tab_stress.plot(kind='bar', stacked=True, figsize=(10, 6))
plt.title('Reported Stress Levels vs. Days Indoors')
plt.xlabel('Days Indoors')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.legend(title='Reported Stress')
plt.show()

# Visualize the relationship between days indoors and changes in habits
cross_tab_habits.plot(kind='bar', stacked=True, figsize=(10, 6))
plt.title('Changes in Mental Health Habits vs. Days Indoors')
plt.xlabel('Days Indoors')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.legend(title='Changes in Habits')
plt.show()