In [147]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [148]:
data = pd.read_csv("data/Absenteeism_with_targets.csv")
data

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month,Day_Of_Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism
0,0,0,0,1,7,1,289,36,33,239.554,30,0,2,1,1
1,0,0,0,0,7,1,118,13,50,239.554,31,0,1,0,0
2,0,0,0,1,7,2,179,51,38,239.554,31,0,0,0,0
3,1,0,0,0,7,3,279,5,39,239.554,24,0,2,0,1
4,0,0,0,1,7,3,289,36,33,239.554,30,0,2,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,1,0,0,0,5,2,179,22,40,237.656,22,1,2,0,1
696,1,0,0,0,5,2,225,26,28,237.656,24,0,1,2,0
697,1,0,0,0,5,3,330,16,28,237.656,25,1,0,0,1
698,0,0,0,1,5,3,235,16,32,237.656,25,1,0,0,0


In [168]:
inputs = data.drop("Absenteeism",axis=1) # Set inputs
targets = data["Absenteeism"] # Set outputs

In [169]:
from sklearn.model_selection import train_test_split # Split the test

X_train,X_test,y_train,y_test = train_test_split(inputs,targets,test_size=0.2,random_state=19)

In [170]:
from sklearn.preprocessing import StandardScaler # Standardize the data

scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [171]:
from sklearn.linear_model import LogisticRegression

log = LogisticRegression()
log.fit(X_train_scaled,y_train)
log_preds = log.predict(X_test_scaled)
log.score(X_test_scaled,y_test)

0.7928571428571428

In [172]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,log_preds)

array([[69, 17],
       [12, 42]])

In [173]:
log.intercept_

array([-0.11853209])

In [174]:
log.coef_

array([[ 1.93138617e+00,  3.33531228e-01,  1.53249800e+00,
         1.29931746e+00,  9.02070485e-02, -2.52179729e-01,
         8.85545424e-01, -1.09049919e-03, -2.54718419e-01,
        -2.28150799e-02,  2.53494964e-01, -5.41724039e-02,
         4.16430726e-01, -4.99026181e-01]])

In [175]:
feature_name = inputs.columns.values
feature_name

array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month',
       'Day_Of_Week', 'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average', 'Body Mass Index', 'Education',
       'Children', 'Pets'], dtype=object)

In [176]:
summary_table = pd.DataFrame(columns = ["Feature name"],data=feature_name)
summary_table

Unnamed: 0,Feature name
0,Reason_1
1,Reason_2
2,Reason_3
3,Reason_4
4,Month
5,Day_Of_Week
6,Transportation Expense
7,Distance to Work
8,Age
9,Daily Work Load Average


In [177]:
summary_table["Coefficient"] = np.transpose(log.coef_)
summary_table

Unnamed: 0,Feature name,Coefficient
0,Reason_1,1.931386
1,Reason_2,0.333531
2,Reason_3,1.532498
3,Reason_4,1.299317
4,Month,0.090207
5,Day_Of_Week,-0.25218
6,Transportation Expense,0.885545
7,Distance to Work,-0.00109
8,Age,-0.254718
9,Daily Work Load Average,-0.022815


In [178]:
coef_df = pd.DataFrame([["Interception",log.intercept_[0]]],columns=["Feature name","Coefficient"])
summary_table = summary_table.append(coef_df,ignore_index=True)

In [179]:
summary_table

Unnamed: 0,Feature name,Coefficient
0,Reason_1,1.931386
1,Reason_2,0.333531
2,Reason_3,1.532498
3,Reason_4,1.299317
4,Month,0.090207
5,Day_Of_Week,-0.25218
6,Transportation Expense,0.885545
7,Distance to Work,-0.00109
8,Age,-0.254718
9,Daily Work Load Average,-0.022815


In [180]:
summary_table["Odds_ratio"] = np.exp(summary_table.Coefficient)
summary_table

Unnamed: 0,Feature name,Coefficient,Odds_ratio
0,Reason_1,1.931386,6.899067
1,Reason_2,0.333531,1.395889
2,Reason_3,1.532498,4.629727
3,Reason_4,1.299317,3.666793
4,Month,0.090207,1.094401
5,Day_Of_Week,-0.25218,0.777105
6,Transportation Expense,0.885545,2.424306
7,Distance to Work,-0.00109,0.99891
8,Age,-0.254718,0.775135
9,Daily Work Load Average,-0.022815,0.977443


In [181]:
summary_table.sort_values(by="Odds_ratio",ascending=False)

Unnamed: 0,Feature name,Coefficient,Odds_ratio
0,Reason_1,1.931386,6.899067
2,Reason_3,1.532498,4.629727
3,Reason_4,1.299317,3.666793
6,Transportation Expense,0.885545,2.424306
12,Children,0.416431,1.516539
1,Reason_2,0.333531,1.395889
10,Body Mass Index,0.253495,1.288521
4,Month,0.090207,1.094401
7,Distance to Work,-0.00109,0.99891
9,Daily Work Load Average,-0.022815,0.977443
