In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore")

In [2]:
file_path = "D:/Projects/Mental Health/Children_Stress.csv"  
df = pd.read_csv(file_path)

In [3]:
print("Basic Info:")
print(df.info())

Basic Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9000 entries, 0 to 8999
Data columns (total 11 columns):
 #   Column                                                                         Non-Null Count  Dtype 
---  ------                                                                         --------------  ----- 
 0   Name                                                                           9000 non-null   object
 1   Email                                                                          9000 non-null   object
 2   Age                                                                            9000 non-null   int64 
 3   Feeling tired or having little energy                                          9000 non-null   object
 4   Poor appetite or overeating                                                    9000 non-null   object
 5   Feeling bad about yourself—or that you are a failure                           9000 non-null   object
 6   Trouble concentratin

In [4]:
print("\nFirst 5 Rows:")
df.head()


First 5 Rows:


Unnamed: 0,Name,Email,Age,Feeling tired or having little energy,Poor appetite or overeating,Feeling bad about yourself—or that you are a failure,Trouble concentrating on things,Moving or speaking so slowly that other people could have noticed,Thoughts that you would be better off dead or of hurting yourself in some way,Stress_Score,Stress_Level
0,Richard Herman,amywalker@example.org,21,Nearly every day,Nearly every day,Not at all,Several days,Nearly every day,More than half the days,2,Low
1,John Park,lindsey60@example.org,17,Nearly every day,Not at all,Not at all,Not at all,Not at all,More than half the days,1,Low
2,Roy Allen,brittany63@example.org,17,Several days,Nearly every day,Not at all,Nearly every day,Nearly every day,More than half the days,1,Low
3,Donna Henderson,jorgeharris@example.com,19,More than half the days,Nearly every day,Not at all,More than half the days,More than half the days,Not at all,0,Low
4,Mason Carter,jnunez@example.net,20,Several days,Not at all,More than half the days,Nearly every day,Nearly every day,Several days,2,Low


In [5]:
print("\nSummary Statistics:")
df.describe(include='all')


Summary Statistics:


Unnamed: 0,Name,Email,Age,Feeling tired or having little energy,Poor appetite or overeating,Feeling bad about yourself—or that you are a failure,Trouble concentrating on things,Moving or speaking so slowly that other people could have noticed,Thoughts that you would be better off dead or of hurting yourself in some way,Stress_Score,Stress_Level
count,9000,9000,9000.0,9000,9000,9000,9000,9000,9000,9000.0,9000
unique,8513,8809,,4,4,4,4,4,4,,3
top,Michael Smith,osmith@example.com,,Nearly every day,Nearly every day,Nearly every day,Not at all,More than half the days,Nearly every day,,Low
freq,7,3,,2302,2328,2304,2274,2278,2277,,3000
mean,,,18.993,,,,,,,8.829,
std,,,1.418394,,,,,,,5.960065,
min,,,17.0,,,,,,,0.0,
25%,,,18.0,,,,,,,4.0,
50%,,,19.0,,,,,,,8.0,
75%,,,20.0,,,,,,,13.0,


In [6]:
print("\nMissing Values:")
df.isnull().sum()


Missing Values:


Name                                                                             0
Email                                                                            0
Age                                                                              0
Feeling tired or having little energy                                            0
Poor appetite or overeating                                                      0
Feeling bad about yourself—or that you are a failure                             0
Trouble concentrating on things                                                  0
Moving or speaking so slowly that other people could have noticed                0
Thoughts that you would be better off dead or of hurting yourself in some way    0
Stress_Score                                                                     0
Stress_Level                                                                     0
dtype: int64

In [7]:
df = df.drop(columns=["Name", "Email", "Age", "Stress_Score"])

In [8]:
response_mapping = {
    "Not at all": 0,
    "Several days": 1,
    "More than half the days": 2,
    "Nearly every day": 3
}

for col in df.columns[:-1]:  
    df[col] = df[col].map(response_mapping)

In [9]:
label_encoder = LabelEncoder()
df["Stress_Level"] = label_encoder.fit_transform(df["Stress_Level"])

In [10]:
X = df.drop(columns=["Stress_Level"])
y = df["Stress_Level"]

In [11]:
X = X.fillna(X.mode().iloc[0])

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
models = {
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "KNN": KNeighborsClassifier()
}

best_Stress_model = None
best_Stress_acc = 0

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Stress_acc = accuracy_score(y_test, y_pred)

    print(f"{name} Stress Accuracy: {Stress_acc:.2f}")
    print("-" * 40)

    if Stress_acc > best_Stress_acc:
        best_Stress_acc = Stress_acc
        best_Stress_model = model
        
print(f"\nBest Model for Stress: {best_Stress_model} with {best_Stress_acc:.2f} accuracy")

Random Forest Stress Accuracy: 0.35
----------------------------------------
Decision Tree Stress Accuracy: 0.34
----------------------------------------
KNN Stress Accuracy: 0.35
----------------------------------------

Best Model for Stress: KNeighborsClassifier() with 0.35 accuracy


In [17]:
print("Please answer the following questions using:")
print("0 - Not at all")
print("1 - Several days")
print("2 - More than half the days")
print("3 - Nearly every day\n")

user_input = []
for question in X.columns:
    while True:
        try:
            ans = int(input(f"{question.replace('_', ' ')}: "))
            if ans in [0, 1, 2, 3]:
                user_input.append(ans)
                break
            else:
                print("Please enter a number between 0 and 3.")
        except ValueError:
            print("Invalid input. Please enter a number.")

user_df = pd.DataFrame([user_input], columns=X.columns)

predicted_Stress = best_Stress_model.predict(user_df)[0]

reverse_label = dict(enumerate(label_encoder.classes_))

print("\n--- Prediction Results ---")
print(f"Stress Level: {reverse_label[predicted_Stress]}")

Please answer the following questions using:
0 - Not at all
1 - Several days
2 - More than half the days
3 - Nearly every day

Feeling tired or having little energy: 3
Poor appetite or overeating: 3
Feeling bad about yourself—or that you are a failure: 3
Trouble concentrating on things: 3
Moving or speaking so slowly that other people could have noticed: 3
Thoughts that you would be better off dead or of hurting yourself in some way: 3

--- Prediction Results ---
Stress Level: Low
