<a href="https://colab.research.google.com/github/tunde99/TUTORIALS/blob/main/LogisticRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Source:** [Aladin Pearson YouTube Tutorials on Logistic Regression from Scratch](https://www.youtube.com/watch?v=x1ez9vi611I&t=99s)

In [66]:
import numpy as np
from sklearn.datasets import make_blobs

"""

X: design matrix of dim (m,n)
    m --> #training examples
    n --> #features
y: target (m,1)
w: weight vector (n,1)
b: bias term - scaler

"""
class LogisticRegressionModel():
    def __init__(self, X, learning_rate=0.1, num_iters=10000):
        self.lr = learning_rate
        self.num_iters = num_iters
        self.m,self.n = X.shape     
    
    def initialise_parameters(self):
        np.random.seed(1)
        self.w = np.random.randn(self.n,1)
        self.b = 0
        return self.w, self.b

    def forward_propagation(self,z):
        return (1 + np.exp(-z))**-1

    def cost_function(self, y, y_hat):
        return -1/self.m * y*np.log(y_hat) + (1-y)*np.log(1-y_hat)
        
    
    def backward_propagation(self, X, y, y_hat):
        self.dw = 1/self.m * np.sum(np.dot(X.T, (y_hat - y)))
        self.db = 1/self.m * np.sum(y_hat - y)
        return self.dw, self.db
    
    def gradient_update(self):
        self.w = self.w - self.lr*self.dw
        self.b = self.b - self.lr*self.db  
        return self.w, self.b

    def train(self, X, y):
        # initialise
        np.random.seed(1)
        self.w, self.b = self.initialise_parameters()
        for iter in range(self.num_iters+1):
            # self.w = np.random.randn(self.n,1)
            # self.b = 0

            # hypothesis 
            z = np.dot(X, self.w) + self.b
            y_hat = self.forward_propagation(z)

            # cost
            cost = self.cost_function(y, y_hat)

            # backward and gradient update
            self.dw, self.db = self.backward_propagation(X, y, y_hat)
            self.w, self.b = self.gradient_update()

            #accuracy = self.evaluate(y_hat, y)
            

            if iter%1000 == 0:
                print(f'Cost after {iter} iterations is: {cost}')
        return self.w, self.b

    def predict(self, X):
        z = np.dot(X, self.w) + self.b
        y_hat = self.forward_propagation(z)
        y_hat_labels = y_hat > 0.5
        return y_hat_labels

    def evaluate(self, y_hat, y):
        accuracy =  1/self.m * np.sum(y==y_hat)
        return accuracy

if __name__ == '__main__':
    np.random.seed(2)
    X, y = make_blobs(n_samples=1000, centers=2)  # centers=2 means two classes label
    y = y.reshape(-1,1) # y = y[:,newaxis]
    log_reg = LogisticRegressionModel(X)
    w, b = log_reg.train(X,y)
    y_hat = log_reg.predict(X)
    accuracy = log_reg.evaluate(y_hat, y)

    print(f'Accuracy: {accuracy}')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 [-1.37734006e-04]
 [-3.87948111e-05]
 [ 2.48867964e-07]
 [ 1.59590730e-09]
 [-1.16626402e-05]
 [ 4.54831176e-08]
 [-8.33804145e-05]
 [ 1.22986800e-08]
 [ 2.40380770e-08]
 [-6.45449107e-06]
 [-2.79001345e-04]
 [-1.51234134e-03]
 [-2.71379910e-05]
 [ 2.54729757e-09]
 [ 1.38781256e-07]
 [ 1.20990412e-09]
 [-6.04614044e-06]
 [-9.70386897e-05]
 [ 2.52548562e-09]
 [ 3.35914877e-06]
 [-8.27549027e-06]
 [-1.79413661e-08]
 [-1.48366842e-04]
 [ 2.38162362e-04]
 [ 6.46048059e-08]
 [ 1.54511422e-07]
 [-2.06158304e-05]
 [-6.41206668e-06]
 [ 2.82370062e-07]
 [ 2.47512340e-05]
 [-7.95313408e-04]
 [-3.84521305e-05]
 [ 1.91241800e-05]
 [ 1.83653417e-08]
 [-1.18682197e-04]
 [-9.75804918e-05]
 [ 5.62387765e-09]
 [ 1.53213350e-07]
 [ 1.05528761e-09]
 [-1.18953418e-02]
 [-6.69292152e-03]
 [ 1.83451343e-09]
 [ 2.16031287e-09]
 [-6.51678097e-06]
 [-2.97637765e-04]
 [ 9.40321519e-05]
 [-4.60834491e-03]
 [-2.85318573e-06]
 [-2.77969335e-04]
 [-9