In [6]:
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import datasets
from sklearn.metrics import accuracy_score

In [7]:
cancer = datasets.load_breast_cancer()
X = cancer['data']
y = cancer['target']

In [8]:
def logistic(z):
    return 1.0/(1.0 + np.exp(-z))


def standard_scaler(X):
    return (X - X.mean(axis=0))/X.std(axis=0)

In the class below, Logistic Regression estimates $\beta$ with gradient descent, using the gradient of the negative log-likelihood.

$\frac{\partial L(\beta)}{\partial\beta} = \frac{\partial -log L(\beta)}{\partial\beta} = -X^T(y-p)$

In [11]:
class BinaryLogisticRegression:
    
    def fit(self, X, y, n_iter, lr,
                  standardize=True, has_intercept=False):
        
        if standardize:
            X = standard_scaler(X)
        
        if not has_intercept:
            ones = np.ones(X.shape[0]).reshape(-1, 1)
            X = np.concatenate((ones, X), axis=1)
            
        self.X = X
        self.N, self.D = X.shape
        self.y = y
        self.n_iter = n_iter
        self.lr = lr
        
        beta = np.random.randn(self.D)
        for i in range(n_iter):
            p = logistic(np.dot(self.X, beta))
            
            gradient = -np.dot(self.X.T, (self.y-p))
            
            beta -= self.lr*gradient
            
        self.beta = beta
        self.p = logistic(np.dot(self.X, self.beta))
        self.yhat = self.p.round()

In [12]:
binary_model = BinaryLogisticRegression()
binary_model.fit(X, y, n_iter=10**4, lr=0.0001)


print('In-sample accuracy: %.4f' % (accuracy_score(binary_model.y, binary_model.yhat)))

In-sample accuracy: 0.9877
