In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [6]:
df = pd.read_csv("/content/cleaned_burnout_dataset.csv")

In [7]:
y = df['BurnoutRisk']
X = df.drop(columns=['BurnoutRisk', 'BurnoutLevel'], errors='ignore')
X = pd.get_dummies(X)

In [8]:
scaler = StandardScaler()
num_cols = X.select_dtypes(include=['number']).columns
X[num_cols] = scaler.fit_transform(X[num_cols])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [13]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
predictions_tree=dt.predict(X_test)
dt_acc = accuracy_score(y_test, predictions_tree)
print("Decision Tree Accuracy :",dt_acc)

Decision Tree Accuracy (all features): 0.5622222222222222


In [14]:
randomf=RandomForestClassifier(random_state=42)
randomf.fit(X_train, y_train)
predictions_rf=randomf.predict(X_test)
rf_acc = accuracy_score(y_test,predictions_rf)
print("Random Forest Accuracy:",rf_acc)

Random Forest Accuracy: 0.6666666666666666


In [15]:
kneighbor=KNeighborsClassifier()
kneighbor.fit(X_train, y_train)
predictions_knn=kneighbor.predict(X_test)
knn_acc = accuracy_score(y_test,predictions_knn)
print("KNN Accuracy:",knn_acc)

KNN Accuracy: 0.6188888888888889


In [16]:
importances = randomf.feature_importances_
feature_series = pd.Series(importances, index=X.columns)
top3 = feature_series.sort_values(ascending=False).head(3).index.tolist()
print("Top 3 important features:", top3)

Top 3 important features: ['EmployeeID', 'ProductivityScore', 'ManagerSupportScore']


In [21]:
Xreduced=X[top3]
X__train, X__test, y__train, y__test = train_test_split(Xreduced, y, test_size=0.3, random_state=42)

In [22]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X__train, y__train)
predictions__tree=dt.predict(X__test)
dt__acc = accuracy_score(y__test, predictions__tree)
print("Decision Tree Accuracy(top 3) :",dt_acc)

Decision Tree Accuracy(top 3) : 0.5622222222222222


In [23]:
randomf=RandomForestClassifier(random_state=42)
randomf.fit(X__train, y__train)
predictions__rf=randomf.predict(X__test)
rf__acc = accuracy_score(y__test,predictions__rf)
print("Random Forest Accuracy(top 3):",rf__acc)

Random Forest Accuracy(top 3): 0.6377777777777778


In [24]:
kneighbor=KNeighborsClassifier()
kneighbor.fit(X__train, y__train)
predictions__knn=kneighbor.predict(X__test)
knn__acc = accuracy_score(y__test,predictions__knn)
print("KNN Accuracy(top 3):",knn__acc)

KNN Accuracy(top 3): 0.6111111111111112
