In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [96]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [66]:
df=pd.read_csv('/content/Employee promotion.csv')
df.head()

Unnamed: 0,age,salary,experience,education_level,gender,city,department,working_hours,projects_completed,performance_score,training_hours,certifications,overtime_hours,satisfaction_level,team_size,remote_work_ratio,innovation_score,attendance_rate,promoted
0,56,141748,26,PhD,Female,Mumbai,Marketing,36,6,2.65,9,8,18,0.4,15,49,9.52,0.75,0
1,46,30287,7,PhD,Female,Bangalore,HR,44,15,1.06,64,1,33,0.85,19,88,2.3,0.94,0
2,32,59387,27,Bachelor,Male,Hyderabad,Finance,36,5,5.2,41,9,24,0.35,9,25,2.6,0.72,0
3,60,33512,13,PhD,Male,Bangalore,Operations,67,29,7.17,84,7,30,0.65,11,55,8.76,0.76,0
4,25,26342,24,Master,Male,Hyderabad,HR,63,19,3.04,8,9,48,0.71,6,77,4.53,0.9,0


In [67]:
x=df.drop('promoted',axis=1)
y=df['promoted']

In [68]:
simple=SimpleImputer()

In [69]:
numeric_features = ['age', 'salary', 'experience', 'working_hours',
                    'projects_completed', 'performance_score',
                    'training_hours', 'certifications', 'overtime_hours',
                    'satisfaction_level', 'team_size', 'remote_work_ratio',
                    'innovation_score', 'attendance_rate']

categorical_features = ['education_level', 'gender', 'city', 'department']

In [70]:
numeric_processor=Pipeline(
    steps=[
        ('imputer', SimpleImputer(missing_values=np.nan,strategy='mean')),
        ('scaler', StandardScaler())
    ]
)

In [71]:
numeric_processor

In [72]:
categorical_processor=Pipeline(
    steps=[("imputation_constant",SimpleImputer(fill_value="missing",strategy="most_frequent")),
     ("onehot",OneHotEncoder(handle_unknown="ignore"))]
)

In [73]:
categorical_processor

In [74]:
preprocessor=ColumnTransformer(
    transformers=[
        ("num",numeric_processor,numeric_features),
        ('cat',categorical_processor,categorical_features)
    ]
)

In [75]:
preprocessor

<h1>Random forest</h1>

In [76]:
model=Pipeline(
    steps=[
        ('preprocessor',preprocessor),
        ('classifier',RandomForestClassifier(n_estimators=100,random_state=42))
    ]
)

In [77]:
model

In [78]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
model.fit(x_train,y_train)

In [79]:
y_pred=model.predict(x_test)

In [80]:
score=model.score(x_test,y_test)

In [81]:
print("Model Accuracy:", round(score, 3))

Model Accuracy: 0.96


In [82]:
model.predict(x_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

<h1>SVM</h1>

In [83]:
model=Pipeline(
    steps=[
        ('preprocessor',preprocessor),
        ('classifier',SVC())
    ]
)

In [84]:
model

In [85]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
model.fit(x_train,y_train)

In [86]:
score=model.score(x_test,y_test)

In [87]:
print("Model Accuracy:", round(score, 3))

Model Accuracy: 0.96


In [88]:
model.predict(x_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

<h1>KNN</h1>

In [89]:
model=Pipeline(
    steps=[
        ('preprocessor',preprocessor),
        ('classifier',KNeighborsClassifier())
    ]
)

In [90]:
model

In [91]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
model.fit(x_train,y_train)

In [92]:
score=model.score(x_test,y_test)

In [93]:
print("Model Accuracy:", round(score, 3))

Model Accuracy: 0.965


In [94]:
model.predict(x_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

<h1>Linear Regression</h1>

In [97]:
model=Pipeline(
    steps=[
        ('preprocessor',preprocessor),
        ('classifier',LogisticRegression())
    ]
)

In [98]:
model

In [99]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
model.fit(x_train,y_train)

In [100]:
score=model.score(x_test,y_test)

In [101]:
print("Model Accuracy:", round(score, 3))

Model Accuracy: 0.98


In [102]:
model.predict(x_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0])