In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore")

In [2]:
file_path = "D:/Projects/Mental Health/Parents_Stress.csv"  
df = pd.read_csv(file_path)

In [3]:
print("Basic Info:")
print(df.info())

Basic Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9000 entries, 0 to 8999
Data columns (total 10 columns):
 #   Column                                                 Non-Null Count  Dtype 
---  ------                                                 --------------  ----- 
 0   Parent_Name                                            9000 non-null   object
 1   Parent_Email                                           9000 non-null   object
 2   Parent_Age                                             9000 non-null   int64 
 3   My child has shown changes in eating habits            9000 non-null   object
 4   My child expresses negative thoughts about themselves  9000 non-null   object
 5   My child has difficulty concentrating on tasks         9000 non-null   object
 6   My child seems unusually slow or withdrawn             9000 non-null   object
 7   My child has mentioned self-harm or feeling hopeless   9000 non-null   object
 8   Child_Stress_Score                            

In [4]:
print("\nFirst 5 Rows:")
df.head()


First 5 Rows:


Unnamed: 0,Parent_Name,Parent_Email,Parent_Age,My child has shown changes in eating habits,My child expresses negative thoughts about themselves,My child has difficulty concentrating on tasks,My child seems unusually slow or withdrawn,My child has mentioned self-harm or feeling hopeless,Child_Stress_Score,Child_Stress_Level
0,James Johnston,mmckinney@example.net,36,More than half the days,More than half the days,More than half the days,Nearly every day,More than half the days,2,Low
1,Caitlin Scott,williamsbrandi@example.org,38,More than half the days,More than half the days,Several days,More than half the days,Not at all,4,Low
2,Christina Kim,jeffery65@example.com,43,Not at all,Nearly every day,Nearly every day,Several days,Several days,3,Low
3,Kim Smith,alexander69@example.org,31,Several days,Several days,Nearly every day,More than half the days,Nearly every day,5,Low
4,Francisco Rhodes DDS,scottandrews@example.org,41,Nearly every day,Not at all,Not at all,Not at all,More than half the days,2,Low


In [5]:
print("\nSummary Statistics:")
df.describe(include='all')


Summary Statistics:


Unnamed: 0,Parent_Name,Parent_Email,Parent_Age,My child has shown changes in eating habits,My child expresses negative thoughts about themselves,My child has difficulty concentrating on tasks,My child seems unusually slow or withdrawn,My child has mentioned self-harm or feeling hopeless,Child_Stress_Score,Child_Stress_Level
count,9000,9000,9000.0,9000,9000,9000,9000,9000,9000.0,9000
unique,8512,8828,,4,4,4,4,4,,3
top,John Davis,jsmith@example.net,,More than half the days,Nearly every day,More than half the days,Nearly every day,Several days,,Low
freq,6,4,,2290,2287,2294,2322,2293,,3000
mean,,,39.980333,,,,,,8.823333,
std,,,6.066761,,,,,,5.992542,
min,,,30.0,,,,,,0.0,
25%,,,35.0,,,,,,4.0,
50%,,,40.0,,,,,,8.0,
75%,,,45.0,,,,,,13.0,


In [6]:
print("\nMissing Values:")
df.isnull().sum()


Missing Values:


Parent_Name                                              0
Parent_Email                                             0
Parent_Age                                               0
My child has shown changes in eating habits              0
My child expresses negative thoughts about themselves    0
My child has difficulty concentrating on tasks           0
My child seems unusually slow or withdrawn               0
My child has mentioned self-harm or feeling hopeless     0
Child_Stress_Score                                       0
Child_Stress_Level                                       0
dtype: int64

In [7]:
df = df.drop(columns=["Parent_Name", "Parent_Email", "Parent_Age", "Child_Stress_Score"])

In [8]:
response_mapping = {
    "Not at all": 0,
    "Several days": 1,
    "More than half the days": 2,
    "Nearly every day": 3
}

for col in df.columns[:-1]:  
    df[col] = df[col].map(response_mapping)

In [10]:
label_encoder = LabelEncoder()
df["Child_Stress_Level"] = label_encoder.fit_transform(df["Child_Stress_Level"])

In [11]:
X = df.drop(columns=["Child_Stress_Level"])
y = df["Child_Stress_Level"]

In [12]:
X = X.fillna(X.mode().iloc[0])

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
models = {
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "KNN": KNeighborsClassifier()
}

best_Stress_model = None
best_Stress_acc = 0

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Stress_acc = accuracy_score(y_test, y_pred)

    print(f"{name} Stress Accuracy: {Stress_acc:.2f}")
    print("-" * 40)

    if Stress_acc > best_Stress_acc:
        best_Stress_acc = Stress_acc
        best_Stress_model = model
        
print(f"\nBest Model for Stress: {best_Stress_model} with {best_Stress_acc:.2f} accuracy")

Random Forest Stress Accuracy: 0.32
----------------------------------------
Decision Tree Stress Accuracy: 0.32
----------------------------------------
KNN Stress Accuracy: 0.32
----------------------------------------

Best Model for Stress: DecisionTreeClassifier() with 0.32 accuracy


In [19]:
print("Please answer the following questions using:")
print("0 - Not at all")
print("1 - Several days")
print("2 - More than half the days")
print("3 - Nearly every day\n")

user_input = []
for question in X.columns:
    while True:
        try:
            ans = int(input(f"{question.replace('_', ' ')}: "))
            if ans in [0, 1, 2, 3]:
                user_input.append(ans)
                break
            else:
                print("Please enter a number between 0 and 3.")
        except ValueError:
            print("Invalid input. Please enter a number.")

user_df = pd.DataFrame([user_input], columns=X.columns)

predicted_Stress = best_Stress_model.predict(user_df)[0]

reverse_label = dict(enumerate(label_encoder.classes_))

print("\n--- Prediction Results ---")
print(f"Stress Level: {reverse_label[predicted_Stress]}")

Please answer the following questions using:
0 - Not at all
1 - Several days
2 - More than half the days
3 - Nearly every day

My child has shown changes in eating habits: 1
My child expresses negative thoughts about themselves: 1
My child has difficulty concentrating on tasks: 1
My child seems unusually slow or withdrawn: 1
My child has mentioned self-harm or feeling hopeless: 1

--- Prediction Results ---
Stress Level: Moderate
