In [121]:
import numpy as np
import pandas as pd
import scipy as sp
from scipy.special import expit, logit

In [128]:
data_set = pd.read_csv('data/logistic_regression_data.csv')
X = data_set[['grade1','grade2']].values
y = data_set['label'].values.reshape(-1, 1)
alpha=0.1
number_of_itterations=2000
np.random.seed(42)
theta = np.random.randn(X.shape[1],1)


In [133]:
X.shape

(100, 2)

In [134]:
y.shape

(100, 1)

In [135]:
theta.shape

(2, 1)

In [136]:
def hypothesis(X, theta):
    return expit(np.matmul(X,theta))

In [137]:
np.abs(np.matmul(X,theta)).min()

0.012605560555504415

In [138]:
hypothesis(X, theta).shape

(100, 1)

In [139]:
def loss_function(target, predicted):
    
    assert target.shape == predicted.shape
    return np.matmul(target.T,np.log(predicted))+np.matmul((1-target).T,np.log(1-predicted))



In [140]:
loss_function(y, hypothesis(X,theta)).shape

(1, 1)

In [141]:
def cost_function(X,y,theta):
    return -1/len(y)*loss_function(y,hypothesis(X,theta))

In [153]:
cost_function(X,y,theta).shape

(1, 1)

In [144]:
def derivative(X,y,theta):
    return np.matmul(X.T,hypothesis(X,theta) - y)/len(y)

In [145]:
derivative(X,y,theta).shape

(2, 1)

In [146]:
def gradient_step(X,y,theta,alpha):
    return theta - alpha*derivative(X,y,theta)

In [147]:
gradient_step(X,y,theta,alpha).shape

(2, 1)

In [149]:
def gradient_descent(X,y,theta,alpha,num_iters):
    for i in range(num_iters):
        theta = gradient_step(X,y,theta,alpha)
        if i % 50 == 0:
            print(i)
            print(cost_function(X,y,theta))
        

In [150]:
gradient_descent(X,y,theta,0.1,number_of_itterations)

0
[[ 64.05600749]]
50
[[ 50.7872403]]
100
[[ 43.68381537]]
150
[[ 39.41528072]]
200
[[ 36.60335187]]
250
[[ 34.62147911]]
300
[[ 33.15316066]]
350
[[ 32.02353732]]
400
[[ 31.12874869]]
450
[[ 30.40339482]]
500
[[ 29.80428881]]
550
[[ 29.30177292]]
600
[[ 28.87480406]]
650
[[ 28.50803429]]
700
[[ 28.19000325]]
750
[[ 27.91197841]]
800
[[ 27.66718767]]
850
[[ 27.45029757]]
900
[[ 27.25705005]]
950
[[ 27.08400418]]
1000
[[ 26.92834906]]
1050
[[ 26.78776589]]
1100
[[ 26.66032493]]
1150
[[ 26.54440721]]
1200
[[ 26.43864456]]
1250
[[ 26.34187296]]
1300
[[ 26.25309593]]
1350
[[ 26.17145557]]
1400
[[ 26.09620938]]
1450
[[ 26.02671159]]
1500
[[ 25.96239799]]
1550
[[ 25.90277357]]
1600
[[ 25.84740225]]
1650
[[ 25.79589849]]
1700
[[ 25.74792023]]
1750
[[ 25.70316296]]
1800
[[ 25.66135481]]
1850
[[ 25.62225232]]
1900
[[ 25.58563685]]
1950
[[ 25.55131159]]


In [151]:
def sigmoid(a):
    return expit(a)

def cost(x, y, theta):
    m = x.shape[0]
    h = sigmoid(np.matmul(x, theta))
    cost = (np.matmul(-y.T, np.log(h)) - np.matmul((1 -y.T), np.log(1 - h)))/m
    return cost

def gradient_Descent(theta, alpha, x , y):
    m = x.shape[0]
    h = sigmoid(np.matmul(x, theta))
    grad = np.matmul(X.T, (h - y)) / m;
    theta = theta - alpha * grad
    return theta

In [152]:
n_iterations = 500
learning_rate = 0.5
theta = np.zeros((X.shape[1],1))
for i in range(n_iterations):
    theta = gradient_Descent(theta, learning_rate, X, y)
    if i % 50 == 0:
        print(cost(X, y, theta))

[[ 0.67122137]]
[[ 0.34832986]]
[[ 0.29864258]]
[[ 0.27936793]]
[[ 0.26940725]]
[[ 0.2634875]]
[[ 0.25966487]]
[[ 0.25705707]]
[[ 0.2552073]]
[[ 0.25385658]]
