In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns

In [None]:
url = "https://raw.githubusercontent.com/utkarshjadhav96/ML-Lab/main/employee_data.csv"
df = pd.read_csv(url)

In [None]:
df.head()

Unnamed: 0,Employee_ID,Name,Age,Salary,Department,Experience (Years),City,Gender,Performance Score
0,101,Employee_1,50,110592,HR,9,Denver,Female,84
1,102,Employee_2,36,48110,IT,29,San Diego,Male,92
2,103,Employee_3,29,119309,IT,26,Chicago,Female,97
3,104,Employee_4,42,67266,Marketing,25,Seattle,Male,65
4,105,Employee_5,40,92992,Finance,24,Boston,Male,91


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Employee_ID         100 non-null    int64 
 1   Name                100 non-null    object
 2   Age                 100 non-null    int64 
 3   Salary              100 non-null    int64 
 4   Department          100 non-null    object
 5   Experience (Years)  100 non-null    int64 
 6   City                100 non-null    object
 7   Gender              100 non-null    object
 8   Performance Score   100 non-null    int64 
dtypes: int64(5), object(4)
memory usage: 7.2+ KB


In [None]:
df = df.drop(['Employee_ID', 'Name'], axis=1)


In [None]:
df['Target'] = df['Performance Score'].apply(lambda x: 1 if x >= 75 else 0)
df = df.drop(['Performance Score'], axis=1)

In [None]:

from sklearn.preprocessing import LabelEncoder, StandardScaler
label_encoders = {}
for col in ['Department', 'City', 'Gender']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


In [None]:
X = df.drop('Target', axis=1)
y = df['Target']


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# 1. Random Forest (Bagging)
rf = RandomForestClassifier(n_estimators=100, random_state=42)

In [None]:
rf.fit(X_train, y_train)


In [None]:
rf_preds = rf.predict(X_test)


In [None]:
print("Random Forest Accuracy:", accuracy_score(y_test, rf_preds))
print(classification_report(y_test, rf_preds))

Random Forest Accuracy: 0.65
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.72      0.87      0.79        15

    accuracy                           0.65        20
   macro avg       0.36      0.43      0.39        20
weighted avg       0.54      0.65      0.59        20



In [None]:
# 2. AdaBoost (Boosting)
ada = AdaBoostClassifier(n_estimators=50, random_state=42)

In [None]:
ada.fit(X_train, y_train)
ada_preds = ada.predict(X_test)

In [None]:
print("AdaBoost Accuracy:", accuracy_score(y_test, ada_preds))
print(classification_report(y_test, ada_preds))


AdaBoost Accuracy: 0.7
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.74      0.93      0.82        15

    accuracy                           0.70        20
   macro avg       0.37      0.47      0.41        20
weighted avg       0.55      0.70      0.62        20



In [None]:
# 3. Voting Classifier (Stacking-style Ensemble)

lr = LogisticRegression()
dt = DecisionTreeClassifier()

In [None]:

voting = VotingClassifier(
    estimators=[('lr', lr), ('dt', dt), ('rf', rf)],
    voting='hard'
)

In [None]:
voting.fit(X_train, y_train)
vote_preds = voting.predict(X_test)

In [None]:

print("🔹 Voting Classifier Accuracy:", accuracy_score(y_test, vote_preds))
print(classification_report(y_test, vote_preds))

🔹 Voting Classifier Accuracy: 0.6
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.71      0.80      0.75        15

    accuracy                           0.60        20
   macro avg       0.35      0.40      0.38        20
weighted avg       0.53      0.60      0.56        20

