In [15]:
#Import Affairs from csv file
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
Affairs = pd.read_csv("Affairs.csv", header=0)
del Affairs['Unnamed: 0']

In [16]:
#Transform affairs into a binary factor called ynaffair
Affairs['ynaffair'] = (Affairs.affairs > 0).astype(int)

In [17]:
#Transform gender variable, 1 represents male, 0 represents female
#Transform children variable, 1 represents yes, 0 represents no
Affairs['gender_1'] = (Affairs.gender == 'male').astype(int)
Affairs['ynchildren'] = (Affairs.children == 'yes').astype(int)

In [4]:
#Select response y and predictors X
y = Affairs['ynaffair']
cols_to_keep = ['age', 'yearsmarried', 'religiousness', 'education', 'occupation', 'rating', 
                'gender_1', 'ynchildren']
X = Affairs[cols_to_keep]

In [5]:
#Flatten y into a 1-D array
y = np.ravel(y)

In [6]:
#Split the data into training and testing
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [7]:
#Build logistic regression model on training data
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model = model.fit(X_train, y_train)

In [8]:
#Run the model on the test set
y_pred = model.predict(X_test)

In [9]:
#Compute confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[110   8]
 [ 27   6]]


In [10]:
#Compute FPR and TPR
from sklearn.metrics import roc_curve
preds = model.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve(y_test, preds)

In [14]:
#Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, label='ROC curve')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('1-Specificity')
plt.ylabel('Sensitivity')
plt.title('ROC curve')
plt.legend(loc="lower right")
plt.show()