In [85]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import OrdinalEncoder
from sklearn.feature_selection import mutual_info_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score

df=pd.read_csv('/content/cleaned_burnout_dataset.csv')

print(df.dtypes)



EmployeeID                  int64
Age                         int64
Gender                     object
Country                    object
JobRole                    object
Department                 object
YearsAtCompany              int64
WorkHoursPerWeek            int64
RemoteWork                 object
BurnoutLevel              float64
JobSatisfaction           float64
StressLevel               float64
ProductivityScore         float64
SleepHours                float64
PhysicalActivityHrs       float64
CommuteTime                 int64
HasMentalHealthSupport     object
ManagerSupportScore       float64
HasTherapyAccess           object
MentalHealthDaysOff         int64
SalaryRange                object
WorkLifeBalanceScore      float64
TeamSize                    int64
CareerGrowthScore         float64
BurnoutRisk                 int64
dtype: object


In [86]:
cat_cols=df.select_dtypes(include="object").columns
num_cols=df.select_dtypes(include="number").columns
print(cat_cols)
print(num_cols)

Index(['Gender', 'Country', 'JobRole', 'Department', 'RemoteWork',
       'HasMentalHealthSupport', 'HasTherapyAccess', 'SalaryRange'],
      dtype='object')
Index(['EmployeeID', 'Age', 'YearsAtCompany', 'WorkHoursPerWeek',
       'BurnoutLevel', 'JobSatisfaction', 'StressLevel', 'ProductivityScore',
       'SleepHours', 'PhysicalActivityHrs', 'CommuteTime',
       'ManagerSupportScore', 'MentalHealthDaysOff', 'WorkLifeBalanceScore',
       'TeamSize', 'CareerGrowthScore', 'BurnoutRisk'],
      dtype='object')


In [87]:
df = pd.get_dummies(df, columns=cat_cols, drop_first=True)
print(df.head())

scaler=MinMaxScaler()
df[num_cols]=scaler.fit_transform(df[num_cols])

   EmployeeID  Age  YearsAtCompany  WorkHoursPerWeek  BurnoutLevel  \
0        1001   50              14                47          3.37   
1        1002   36               1                59          7.39   
2        1003   29              13                59          7.10   
3        1004   42              15                31          4.18   
4        1005   40               6                34          8.28   

   JobSatisfaction  StressLevel  ProductivityScore  SleepHours  \
0             5.06         9.47               4.16         7.0   
1             2.00         5.65               3.74         7.2   
2             7.17         5.70               8.80         5.2   
3             3.76         6.40               4.69         8.7   
4             2.34         3.41               2.12         4.2   

   PhysicalActivityHrs  ...  Department_Sales  Department_Support  \
0                  7.9  ...             False               False   
1                  9.0  ...             Fals

In [88]:
print(df.head())


   EmployeeID       Age  YearsAtCompany  WorkHoursPerWeek  BurnoutLevel  \
0    0.000000  0.756757            0.70          0.586207      0.263333   
1    0.000333  0.378378            0.05          1.000000      0.710000   
2    0.000667  0.189189            0.65          1.000000      0.677778   
3    0.001000  0.540541            0.75          0.034483      0.353333   
4    0.001334  0.486486            0.30          0.137931      0.808889   

   JobSatisfaction  StressLevel  ProductivityScore  SleepHours  \
0         0.451111     0.941111           0.351111        0.60   
1         0.111111     0.516667           0.304444        0.64   
2         0.685556     0.522222           0.866667        0.24   
3         0.306667     0.600000           0.410000        0.94   
4         0.148889     0.267778           0.124444        0.04   

   PhysicalActivityHrs  ...  Department_Sales  Department_Support  \
0                 0.79  ...             False               False   
1             

In [95]:

X=df.drop('StressLevel',axis=1)
y=df['StressLevel']
mi_scores = mutual_info_regression(X, y)
mi_scores = pd.Series(mi_scores, index=X.columns)
mi_scores = mi_scores.sort_values(ascending=False)
print(mi_scores)

Stress_WorkHours              0.727018
SleepHours                    0.010776
SalaryRange_<40K              0.010637
Country_Brazil                0.010038
Country_Germany               0.009339
CareerGrowthScore             0.008999
ManagerSupportScore           0.008657
RemoteWork_Yes                0.008020
WorkHours_SleepHours          0.006444
SalaryRange_80K-100K          0.006431
Department_IT                 0.004888
BurnoutRisk                   0.004422
SalaryRange_40K-60K           0.004374
YearsAtCompany                0.004075
Country_India                 0.003784
JobRole_HR Specialist         0.002913
Country_Canada                0.002197
JobRole_Sales Associate       0.002120
RemoteWork_No                 0.001828
JobRole_Software Engineer     0.001383
Department_HR                 0.001363
ProductivityScore             0.001037
JobRole_IT Admin              0.000760
EmployeeID                    0.000000
Age                           0.000000
MentalHealthDaysOff      

In [94]:
#interaction features
df['Stress_WorkHours'] = df['StressLevel'] * df['WorkHoursPerWeek']
# df['Stress_SleepHours'] = df['StressLevel'] * df['SleepHoursPerDay']
df['WorkHours_SleepHours'] = df['WorkHoursPerWeek'] * df['SleepHours']


In [83]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [74]:
#Linear Regression
model=LinearRegression()
model.fit(X_train, y_train)
print(model.coef_)
predictions=model.predict(X_test)
# plt.scatter(y_test, predictions)
print("Linear Regression")
print("MSE",mean_squared_error(y_test,predictions))
print("R² Score:", r2_score(y_test, predictions))

[ 0.01570836 -0.00533359 -0.01128009 -0.01779716  0.03912568 -0.02610568
 -0.03088591  0.02389909 -0.01345036 -0.02031127 -0.04573441  0.00693008
  0.02149263  0.02669996 -0.00625983 -0.0146642  -0.03405089 -0.02452793
  0.00205778  0.00656089 -0.0043251   0.01749773 -0.02024604  0.02107358
  0.04291587  0.00380048 -0.00389144  0.00436856  0.01009397  0.02306338
  0.00569293 -0.03070655 -0.03685875 -0.00526187 -0.01742659 -0.03314888
 -0.04510642 -0.01291278 -0.02583568 -0.0018802   0.01064861  0.00837127
  0.03542098 -0.01953842  0.00273281]
Linear Regression
MSE 0.08570396333894956
R² Score: -0.026703483302488173


In [75]:

#Ridge Regression
ridge_model=Ridge(alpha=0.1)
ridge_model.fit(X_train, y_train)
print(model.coef_)
ridge_predictions=ridge_model.predict(X_test)
# plt.scatter(y_test, ridge_predictions)
print("Ridge Regression")
print("MSE",mean_squared_error(y_test,ridge_predictions))
print("R² Score:", r2_score(y_test, ridge_predictions))

[ 0.01570836 -0.00533359 -0.01128009 -0.01779716  0.03912568 -0.02610568
 -0.03088591  0.02389909 -0.01345036 -0.02031127 -0.04573441  0.00693008
  0.02149263  0.02669996 -0.00625983 -0.0146642  -0.03405089 -0.02452793
  0.00205778  0.00656089 -0.0043251   0.01749773 -0.02024604  0.02107358
  0.04291587  0.00380048 -0.00389144  0.00436856  0.01009397  0.02306338
  0.00569293 -0.03070655 -0.03685875 -0.00526187 -0.01742659 -0.03314888
 -0.04510642 -0.01291278 -0.02583568 -0.0018802   0.01064861  0.00837127
  0.03542098 -0.01953842  0.00273281]
Ridge Regression
MSE 0.0857019653392584
R² Score: -0.026679547965514194


In [77]:
#Lasso Regression
lasso_model=Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)
print(model.coef_)
lasso_predictions=lasso_model.predict(X_test)
# plt.scatter(y_test, lasso_predictions)
print("Laasso Regression")
print("MSE",mean_squared_error(y_test,lasso_predictions))
print("R² Score:", r2_score(y_test, lasso_predictions))

[ 0.01570836 -0.00533359 -0.01128009 -0.01779716  0.03912568 -0.02610568
 -0.03088591  0.02389909 -0.01345036 -0.02031127 -0.04573441  0.00693008
  0.02149263  0.02669996 -0.00625983 -0.0146642  -0.03405089 -0.02452793
  0.00205778  0.00656089 -0.0043251   0.01749773 -0.02024604  0.02107358
  0.04291587  0.00380048 -0.00389144  0.00436856  0.01009397  0.02306338
  0.00569293 -0.03070655 -0.03685875 -0.00526187 -0.01742659 -0.03314888
 -0.04510642 -0.01291278 -0.02583568 -0.0018802   0.01064861  0.00837127
  0.03542098 -0.01953842  0.00273281]
Laasso Regression
MSE 0.08368091920793555
R² Score: -0.002468122704610609
