In [1]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn import preprocessing

In [2]:
from sklearn.ensemble import RandomForestClassifier

In [3]:
tree_model=tree.DecisionTreeClassifier(max_depth=8)

In [4]:
label_encoder=preprocessing.LabelEncoder()

In [5]:
attrition=pd.read_csv("general_data.csv")

In [6]:
attrition["Attrition"]=label_encoder.fit_transform(attrition["Attrition"])
attrition["Gender"]=label_encoder.fit_transform(attrition["Gender"])
attrition["JobRole"]=label_encoder.fit_transform(attrition["JobRole"])
attrition["MaritalStatus"]=label_encoder.fit_transform(attrition["MaritalStatus"])
attrition["BusinessTravel"]=label_encoder.fit_transform(attrition["BusinessTravel"])
attrition["Department"]=label_encoder.fit_transform(attrition["Department"])
attrition["EducationField"]=label_encoder.fit_transform(attrition["EducationField"])

In [7]:
attrition.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeID,Gender,...,NumCompaniesWorked,Over18,PercentSalaryHike,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,YearsAtCompany,YearsSinceLastPromotion,YearsWithCurrManager
0,51,0,2,2,6,2,1,1,1,0,...,1.0,Y,11,8,0,1.0,6,1,0,0
1,31,1,1,1,10,1,1,1,2,0,...,0.0,Y,23,8,1,6.0,3,5,1,4
2,32,0,1,1,17,4,4,1,3,1,...,1.0,Y,15,8,3,5.0,2,5,0,3
3,38,0,0,1,2,5,1,1,4,1,...,3.0,Y,11,8,3,13.0,5,8,7,5
4,32,0,2,1,10,1,3,1,5,1,...,4.0,Y,12,8,2,9.0,2,6,0,4


In [8]:
rf_model=RandomForestClassifier(n_estimators=1000,max_features=2,oob_score=True)

In [9]:
attrition.columns

Index(['Age', 'Attrition', 'BusinessTravel', 'Department', 'DistanceFromHome',
       'Education', 'EducationField', 'EmployeeCount', 'EmployeeID', 'Gender',
       'JobLevel', 'JobRole', 'MaritalStatus', 'MonthlyIncome',
       'NumCompaniesWorked', 'Over18', 'PercentSalaryHike', 'StandardHours',
       'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear',
       'YearsAtCompany', 'YearsSinceLastPromotion', 'YearsWithCurrManager'],
      dtype='object')

In [10]:
features=pd.DataFrame([attrition["Age"],attrition["BusinessTravel"],attrition["Department"],attrition["DistanceFromHome"],attrition["Gender"],attrition["JobRole"],attrition["MaritalStatus"],attrition["MonthlyIncome"],attrition["PercentSalaryHike"],attrition["YearsSinceLastPromotion"],attrition["YearsWithCurrManager"]]).T

In [11]:
rf_model.fit(X=features,y=attrition["Attrition"])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features=2,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=True, random_state=None,
                       verbose=0, warm_start=False)

In [12]:
print("OOB Accuracy")
print(rf_model.oob_score_);

OOB Accuracy
1.0


In [13]:
for feature,imp in zip(features,rf_model.feature_importances_):
    print(feature,imp)

Age 0.16634442155092724
BusinessTravel 0.03998942723516103
Department 0.042445564138517265
DistanceFromHome 0.12001704328976914
Gender 0.028632749888306046
JobRole 0.09267107650494126
MaritalStatus 0.051783163415662296
MonthlyIncome 0.16862921652234755
PercentSalaryHike 0.11050297171386571
YearsSinceLastPromotion 0.07475443143858058
YearsWithCurrManager 0.10422993430192186


In [14]:
attrition.isnull().isnull().sum()

Age                        0
Attrition                  0
BusinessTravel             0
Department                 0
DistanceFromHome           0
Education                  0
EducationField             0
EmployeeCount              0
EmployeeID                 0
Gender                     0
JobLevel                   0
JobRole                    0
MaritalStatus              0
MonthlyIncome              0
NumCompaniesWorked         0
Over18                     0
PercentSalaryHike          0
StandardHours              0
StockOptionLevel           0
TotalWorkingYears          0
TrainingTimesLastYear      0
YearsAtCompany             0
YearsSinceLastPromotion    0
YearsWithCurrManager       0
dtype: int64

In [15]:
predictors=pd.DataFrame([attrition["Age"],attrition["MonthlyIncome"],attrition["PercentSalaryHike"]]).T

In [16]:
tree_model.fit(X=predictors,y=attrition["Attrition"])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=8, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [17]:
with open("Attrition.dot",'w') as f:
    f=tree.export_graphviz(tree_model,feature_names=["Sex","Age","Fare"],out_file=f)


In [18]:
tree_model.score(X=predictors,y=attrition["Attrition"])

0.8829931972789116

### The model is 88.29931972789116% Accurate and Hence can be used to decide the reason behind the attritotion pattern of the company

In [24]:
feature=["Age","MonthlyIncome","PercentSalaryHike"]

In [25]:
from sklearn.tree import export_text

In [27]:
r=export_text(tree_model,feature_names=feature)
print(r)

|--- Age <= 33.50
|   |--- Age <= 21.50
|   |   |--- MonthlyIncome <= 182405.00
|   |   |   |--- MonthlyIncome <= 50545.00
|   |   |   |   |--- MonthlyIncome <= 42000.00
|   |   |   |   |   |--- PercentSalaryHike <= 16.50
|   |   |   |   |   |   |--- MonthlyIncome <= 28270.00
|   |   |   |   |   |   |   |--- MonthlyIncome <= 23140.00
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- MonthlyIncome >  23140.00
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- MonthlyIncome >  28270.00
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- PercentSalaryHike >  16.50
|   |   |   |   |   |   |--- MonthlyIncome <= 30920.00
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- MonthlyIncome >  30920.00
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- MonthlyIncome >  42000.00
|   |   |   |   |   |--- class: 0
|   |   |   |--- MonthlyIncome >  50545.00
|   |   |   |   |--- MonthlyIncome <= 65420.00