In [8]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
data_path = '/hdd/data/adp_data/student_lifestyle_dataset.csv'
df = pd.read_csv(data_path, index_col = 'Student_ID')
df.head()

Unnamed: 0_level_0,Study_Hours_Per_Day,Extracurricular_Hours_Per_Day,Sleep_Hours_Per_Day,Social_Hours_Per_Day,Physical_Activity_Hours_Per_Day,Stress_Level,Gender,Grades
Student_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,6.9,3.8,8.7,2.8,1.8,Moderate,Male,7.48
2,5.3,3.5,8.0,4.2,3.0,Low,Female,6.88
3,5.1,3.9,9.2,1.2,4.6,Low,Male,6.68
4,6.5,2.1,7.2,1.7,6.5,Moderate,Male,7.2
5,8.1,0.6,6.5,2.2,6.6,High,Male,8.78


In [3]:
df_encoded = pd.get_dummies(df, columns = ['Gender'], drop_first=True)
le = LabelEncoder()
df_encoded['Stress_Level_Label'] = le.fit_transform(df_encoded['Stress_Level'])

X = df_encoded.drop(columns=['Stress_Level', 'Stress_Level_Label'])
y = df_encoded['Stress_Level_Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 526)

In [None]:
models = {
    'Random Forest': RandomForestClassifier(random_state = 526),
    'XGBoost': XGBClassifier(random_state = 526, eval_metric = 'mlogloss'),
    'Logistic Regression': LogisticRegression(max_iter = 1000, solver = 'lbfgs')
}

for name, model in models.items():
    print(f'\n{name}')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred, target_names = le.classes_))
    print('Confufsion Matrix:\n', confusion_matrix(y_test, y_pred))


Random Forest
              precision    recall  f1-score   support

        High       1.00      1.00      1.00       206
         Low       1.00      1.00      1.00        49
    Moderate       1.00      1.00      1.00       145

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400

Confufsion Matrix:
 [[206   0   0]
 [  0  49   0]
 [  0   0 145]]

XGBoost
              precision    recall  f1-score   support

        High       1.00      1.00      1.00       206
         Low       1.00      1.00      1.00        49
    Moderate       1.00      1.00      1.00       145

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400

Confufsion Matrix:
 [[206   0   0]
 [  0  49   0]
 [  0   0 145]]

Logistic Regression
              precision    recall  f1-score   support

      

Parameters: { "use_label_encoder" } are not used.



In [9]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

rf = RandomForestClassifier(random_state = 526)
grid_search = GridSearchCV(rf, param_grid, cv = 5, scoring = 'accuracy', n_jobs = -1, verbose = 1)
grid_search.fit(X_train, y_train)

print('Best Parameters:', grid_search.best_params_)
best_rf = grid_search.best_estimator_

y_pred = best_rf.predict(X_test)
print(classification_report(y_test, y_pred, target_names = le.classes_))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))

Fitting 5 folds for each of 24 candidates, totalling 120 fits
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
              precision    recall  f1-score   support

        High       1.00      1.00      1.00       206
         Low       1.00      1.00      1.00        49
    Moderate       1.00      1.00      1.00       145

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400

Confusion Matrix:
 [[206   0   0]
 [  0  49   0]
 [  0   0 145]]
