## Gradient descent: Logistic Regression

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  

In [3]:
from sklearn.datasets import load_boston
bdata = load_boston() # using the Boston Housing Prices Data Set
EXPENSIVE = np.where(bdata.target > 40, 1.0, 0.0)

Model Representation:
$$h_\theta(x) = g(\theta^Tx)$$
$$g(z) = \frac{1}{1+e^{-z}}$$
* $h_\theta(x)$ will give us the probability that the output is 1

Cost Function:
$$-\frac{1}{m}\sum\limits_{i=1}^m[y^{(i)}log(h_\theta(x^{(i)})) + (1-y^{(i)})log(1-h_\theta(x^{(i)}))]$$

In [11]:
"""
Function
--------
multivariate_lgs
    Gradient Descent to minimize cost function of logistic regression. 
    Used to find co-efficients of multivariate logistic regression.

Parameters
----------
xvalue_matrix, yvalues : narray
    xvalue_matrix: independent variable
    yvalues: dependent variable
    
R: float
    Learning rate
    
MaxIterations: Int
    maximum number of iterations

Returns
-------
alpha: float
    intercept
    
beta: float
    co-efficient
"""
def multivariate_lgs(xvalue_matrix, yvalues, R= 0.1, MaxIterations = 100000):
    m = xvalue_matrix.shape[0] # number of rows
    n = xvalue_matrix.shape[1] # number of columns
    X_b = np.c_[np.ones((m,1)), xvalue_matrix]
    theta = np.random.randn(n+1,1)
    yhat = X_b.dot(theta)
    pred = 1.0/(1.0 + np.exp(-yhat)) # prediction based on logistic distribution
    error = -1/m*(np.dot(yvalues.T, np.log(pred))+np.dot((1-yvalues).T, np.log(1-pred))) # loss function
    
    for i in range(MaxIterations):
        gradient =  2/m * X_b.T.dot(pred - yvalues)
        theta = theta - R * gradient
        yhat = X_b.dot(theta)
        pred = 1.0/(1.0 + np.exp(-yhat))
        new_error = -1/m*(np.dot(yvalues.T, np.log(pred))+np.dot((1-yvalues).T, np.log(1-pred)))
        if abs(error-new_error)<1e-10:
            print("Converged, iterations:%d"%i)
            break
        else:
            error = new_error
    return theta

In [12]:
from sklearn.preprocessing import StandardScaler
X = bdata.data[:,[3,5]] # CHAS AND RM
Y = EXPENSIVE.reshape(-1,1) 
theta = multivariate_lgs(StandardScaler().fit_transform(X),Y) # logistic regression fit
print(theta)

Converged, iterations:4268
[[-4.57765253]
 [ 0.33377116]
 [ 2.16114743]]


In [13]:
from sklearn.linear_model import LogisticRegression
lgr = LogisticRegression(C=1e10)
lgr.fit(StandardScaler().fit_transform(X), Y.flatten())
print(lgr.intercept_, lgr.coef_)

[-4.58037959] [[ 0.33396565  2.1627009 ]]
