### Human Stress Detection ( Model Training)

In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv("dataset.csv")
df.head()

Unnamed: 0,sr,rr,t,lm,bo,rem,sh,hr,sl
0,93.8,25.68,91.84,16.6,89.84,99.6,1.84,74.2,3
1,91.64,25.104,91.552,15.88,89.552,98.88,1.552,72.76,3
2,60.0,20.0,96.0,10.0,95.0,85.0,7.0,60.0,1
3,85.76,23.536,90.768,13.92,88.768,96.92,0.768,68.84,3
4,48.12,17.248,97.872,6.496,96.248,72.48,8.248,53.12,0


In [4]:
df.isnull().sum()

sr     0
rr     0
t      0
lm     0
bo     0
rem    0
sh     0
hr     0
sl     0
dtype: int64

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

In [6]:
# Features and target variable
X = df.drop(['sl'], axis=1)
y = df['sl']

In [7]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Create a dictionary of classifiers
classifiers = {
    'Random Forest': RandomForestClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Support Vector Classifier': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Gradient Boosting': GradientBoostingClassifier()
}

In [9]:
# Iterate over classifiers and train them
for name, clf in classifiers.items():
    # Create a pipeline with preprocessing and classifier
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', clf)
    ])
    
    # Train the model
    pipeline.fit(X_train, y_train)
    
    # Make predictions
    y_pred = pipeline.predict(X_test)
    
    # Evaluate the model
    print(f'Classifier: {name}')
    print('Accuracy:', accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    print('-' * 30)

Classifier: Random Forest
Accuracy: 0.9761904761904762
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        23
           1       1.00      0.92      0.96        24
           2       0.97      1.00      0.98        28
           3       1.00      0.96      0.98        26
           4       0.96      1.00      0.98        25

    accuracy                           0.98       126
   macro avg       0.98      0.98      0.98       126
weighted avg       0.98      0.98      0.98       126

------------------------------
Classifier: Logistic Regression
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        24
           2       1.00      1.00      1.00        28
           3       1.00      1.00      1.00        26
           4       1.00      1.00      1.00        25

    accuracy                           1.00       126

In [10]:
# Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [11]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [12]:
# Predict on test set
pred = model.predict(X_test)

In [13]:
# Calculate metrics
accuracy = accuracy_score(y_test, pred)
precision = precision_score(y_test, pred, average='weighted')
recall = recall_score(y_test, pred, average='weighted')
f1 = f1_score(y_test, pred, average='weighted')

In [14]:
# Print metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.9920634920634921
Precision: 0.9923941798941799
Recall: 0.9920634920634921
F1 Score: 0.9920634920634921


In [15]:
import pickle
from sklearn.ensemble import RandomForestClassifier
# Save the model to a file
with open('stress_detection.pkl', 'wb') as file:
    pickle.dump(model, file)

In [16]:
# Get feature importances
feature_importances = model.feature_importances_

# Create a DataFrame to view the feature importances
feature_importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': feature_importances
}).sort_values(by='Importance', ascending=False)

print(feature_importance_df)

  Feature  Importance
4      bo    0.157824
2       t    0.133265
1      rr    0.125516
5     rem    0.122985
0      sr    0.121569
7      hr    0.119902
6      sh    0.110357
3      lm    0.108582
