In [None]:
import collections
import numpy as np
from random import shuffle
from random import seed
from random import random
from random import randrange
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

In [None]:

'''
In general, LR's idea is to make prediction, and improve weights and bias from the cost function that is calculated from prediction / GroundTruth
'''

class LogisticRegression():
    def __init__(self, X, learning_rate = 0.1, num_iters = 10000):
        self.lr = learning_rate
        self.num_iters = num_iters
        #m samples and n features
        self.m, self.n = X.shape
    
    def train(self, X, y):
        self.weights = np.zeros((self.n, 1))
        self.bias = 0
        
        for it in range(self.num_iters + 1):
            #calculate hypothesis
            y_predict = self.sigmoid(np.dot(X, self.weights) + self.bias)
            #calculate cost
            cost = -1/self.m * np.sum(y*np.log(y_predict) + (1 - y) * np.log(1 - y_predict))
            #backprop
            dw = 1/self.m * np.dot(X.T, (y_predict - y))
            db = 1/self.m * np.sum(y_predict - y)
            
            self.weights -= self.lr * dw
            self.bias -= self.lr * db
            
            if it % 1000 == 0:
                print(f'Cost after iteration {it}: {cost}')
        return self.weights, self.bias
    
    def predict(self, X):
        y_predict = self.sigmoid(np.dot(X, self.weights)) + self.bias
        y_predict_labels = y_predict > 0.5
        return y_predict_labels
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
def plot_data(dataset,color = 'r'):
    d = collections.defaultdict(list)
    for x, y, data_type in dataset:
        d[data_type].append([x,y])
    for key in d.keys():
        data = np.transpose(np.array(d[key]))
        plt.scatter(data[0],data[1], label = key)
    plt.legend()

In [None]:
'''
Let's test it out.
'''
if __name__ =='__main__':
    np.random.seed(1)
    #create test data
    x1_base, y1_base, x2_base, y2_base = 0, 0, 2, 2
    x1_rand, y1_rand, x2_rand, y2_rand = 1, 1, 1, 1
    type_1data = [[x1_base + (random() - 0.5) * x1_rand, y1_base + (random() - 0.5) * y1_rand, 0] for _ in range(100)]
    type_2data = [[x2_base + (random() - 0.5) * x2_rand, y2_base + (random() - 0.5) * y2_rand, 1] for _ in range(100)]
    dataset =  np.array(type_1data + type_2data)
    shuffle(dataset)
    X = dataset[:,:2]
    y = dataset[:,2]
    y = y[:, np.newaxis]
    logreg = LogisticRegression(X)
    w, b = logreg.train(X, y)
    y_predict = logreg.predict(X)
    print(f'Accuracy: {np.sum(y==y_predict) / X.shape[0]}')