In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score, accuracy_score

# Load dataset
df = pd.read_csv("/kaggle/input/stress-dataset/Stress Dataset _ Tutorial-1.csv")

# Encoding categorical variables
categorical_cols = ['Gender', 'Marital_Status', 'Job_Role', 'Health_Issues', 
                    'Company_Size', 'Department', 'Burnout_Symptoms', 'Location']
label_encoders = {col: LabelEncoder() for col in categorical_cols}

for col in categorical_cols:
    df[col] = label_encoders[col].fit_transform(df[col])

# Converting boolean columns to integers
boolean_cols = ['Remote_Work', 'Mental_Health_Leave_Taken', 'Training_Opportunities', 
                'Gender_Bias_Experienced', 'Discrimination_Experienced']
df[boolean_cols] = df[boolean_cols].astype(int)

# Splitting dataset into features and target
X = df.drop(columns=['Stress_Level'])
y = df['Stress_Level']

# Standardizing numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Training Gradient Boosting Regressor
gbr_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gbr_model.fit(X_train, y_train)

# Making predictions
y_pred_gbr = gbr_model.predict(X_test)

# Evaluating the model
mae_gbr = mean_absolute_error(y_test, y_pred_gbr)
r2_gbr = r2_score(y_test, y_pred_gbr)
accuracy = np.mean(np.round(y_pred_gbr) == y_test) * 100

print(f"Mean Absolute Error: {mae_gbr}")
print(f"R² Score: {r2_gbr}")
print(f"Accuracy: {accuracy:.2f}%")


Mean Absolute Error: 2.7429827620276708
R² Score: -0.0024323701797612785
Accuracy: 9.20%
