In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import statsmodels.api as sm
from bokeh.plotting import *
from bokeh.io import output_notebook
from bokeh.charts import Scatter, show

In [2]:
output_notebook()

In [3]:
df = pd.read_csv('Email-Offer.csv')

In [4]:
df.head()

Unnamed: 0,Age,Gender,TookAction
0,38,Female,0
1,32,Female,0
2,46,Male,1
3,34,Male,0
4,40,Male,0


In [5]:
dummies = pd.get_dummies(df['Gender']).rename(columns=lambda x: str(x))
df = pd.concat([df, dummies], axis=1)

In [6]:
df.head()

Unnamed: 0,Age,Gender,TookAction,Female,Male
0,38,Female,0,1,0
1,32,Female,0,1,0
2,46,Male,1,0,1
3,34,Male,0,0,1
4,40,Male,0,0,1


In [7]:
p = figure()
p.line(np.unique(df['Age']), np.poly1d(np.polyfit(df['Age'], df['TookAction'], 1))(np.unique(df['Age'])), line_width=2)
p.scatter(df['Age'], df['TookAction'], fill_color='blue', size=10)
show(p)

In [8]:
y = df['TookAction']

In [9]:
X1 = df[['Age']]
X1 = sm.add_constant(X1)
model1 = sm.Logit(y, X1)
results1 = model1.fit()
print(results1.summary())
print "Confusion Matrix"
print(results1.pred_table())

Optimization terminated successfully.
         Current function value: 0.233883
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:             TookAction   No. Observations:                  100
Model:                          Logit   Df Residuals:                       98
Method:                           MLE   Df Model:                            1
Date:                Sun, 05 Mar 2017   Pseudo R-squ.:                  0.6525
Time:                        20:07:03   Log-Likelihood:                -23.388
converged:                       True   LL-Null:                       -67.301
                                        LLR p-value:                 7.148e-21
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const        -22.5768      4.795     -4.709      0.000       -31.974   -13.179
Age            0.5574      0.

In [10]:
# Now we calculate the predicted values based on the independent variable YearsExperience
y_hat1 = results1.predict(X1)

In [11]:
p = figure()
p.scatter(df['Age'], df['TookAction'], fill_color='blue', size=10)
p.scatter(df['Age'], y_hat1, fill_color='red', marker='triangle', size=10)
show(p)

In [12]:
X2 = df[['Age', 'Male']]
X2 = sm.add_constant(X2)
model2 = sm.Logit(y, X2)
results2 = model2.fit()
print(results2.summary())
print "Confusion Matrix"
print(results2.pred_table())

Optimization terminated successfully.
         Current function value: 0.148067
         Iterations 10
                           Logit Regression Results                           
Dep. Variable:             TookAction   No. Observations:                  100
Model:                          Logit   Df Residuals:                       97
Method:                           MLE   Df Model:                            2
Date:                Sun, 05 Mar 2017   Pseudo R-squ.:                  0.7800
Time:                        20:07:10   Log-Likelihood:                -14.807
converged:                       True   LL-Null:                       -67.301
                                        LLR p-value:                 1.592e-23
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const        -38.1520      9.987     -3.820      0.000       -57.726   -18.578
Age            0.8872      0

In [13]:
# Now we calculate the predicted values based on the independent variable YearsExperience
y_hat2 = results2.predict(X2)

In [14]:
p = figure()
p.scatter(df['Age'], df['TookAction'], fill_color='blue', size=10)
p.scatter(df['Age'], y_hat2, fill_color='red', marker='triangle', size=10)
show(p)

In [15]:
pd.concat([df[['Age', 'Gender', 'TookAction']], pd.DataFrame(y_hat2, columns=['Prediction'])], axis=1).head(30)

Unnamed: 0,Age,Gender,TookAction,Prediction
0,38,Female,0,0.01168869
1,32,Female,0,5.766916e-05
2,46,Male,1,0.9991738
3,34,Male,0,0.02795237
4,40,Male,0,0.8550104
5,37,Female,0,0.004846684
6,43,Male,1,0.9882969
7,38,Male,1,0.5
8,27,Male,0,5.774117e-05
9,42,Female,0,0.2914249
