## Implemention of logistic regression

In [1]:
import numpy as np
import pandas as pd

class LogisticRegressor:
    def __init__(self, X, y, lr, iter):
        self.X = X
        self.y = y
        self.lr = lr
        self.iter = iter
        self.n_ex = X.shape[0]
        self.n_feat = X.shape[1]
        self.w = np.zeros((self.n_feat, 1))
        self.b = 0 

    def sigmoid_fn(self, Z):
        sigma = 1/(1+np.exp(-Z))
        return sigma
        
    def grad_ascent(self, X, y, learning_rate, n_iter):
        y = np.atleast_2d(y)   
        
        for i in range(n_iter):
            sigma = self.sigmoid_fn(np.dot(X, self.w) + self.b)        
            
            grads = np.dot((y.T - sigma).T, X).T 
        
            self.w = self.w + learning_rate * grads     
            
            self.b = self.b + np.sum(y.T - sigma) / self.n_ex
        print('Weights: \n', self.w.T)
        print('Bias:', self.b)

    def fit(self):
        self.grad_ascent(self.X, self.y, self.lr, self.iter)
    
    def predict(self, X):
        sigma = self.sigmoid_fn(np.dot(X, self.w) + self.b)
        size = sigma.shape[0]
        y_pred = [0]*size

        for i in range(size):
            if sigma[i] > 0.5:
                y_pred[i] = 1
        return y_pred

In [2]:
from sklearn.model_selection import train_test_split

def accuracy(y_pred, y):
    total = y.shape[0]
    count = 0 
    for i in range(total):
        if y_pred[i] == y[i]:
            count += 1
    return 100 * count/total

def standardize(X):
    for j in range(X.shape[1]):
        mean = np.mean(X[:, j])
        sd = np.std(X[:, j])
        X[:, j] = (X[:, j] - mean)/sd

df = pd.read_csv("Social_Network_Ads.csv")

df["Gender"] = np.where(df["Gender"] == "Female", 0, 1)

X = np.asarray(df[['Gender', 'Age', 'EstimatedSalary']]).astype('float64')

standardize(X)

y = np.asarray(df["Purchased"])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

learning_rate = 0.001
iterations = 1000

lr = LogisticRegressor(X_train, y_train, learning_rate, iterations)
lr.fit()

y_pred_tr = lr.predict(X_train)

accuracy_tr = accuracy(y_pred_tr, y_train)
print('\nAccuracy for train data:', accuracy_tr, '%')

y_pred_te = lr.predict(X_test)

accuracy_te = accuracy(y_pred_te, y_test)
print('Accuracy for test data:', accuracy_te, '%')

Weights: 
 [[0.15774256 2.21189715 1.22286512]]
Bias: -1.1207162134545934

Accuracy for train data: 83.125 %
Accuracy for test data: 88.75 %


In [3]:
#from inbuilt functions

from sklearn.linear_model import LogisticRegression

lr1 = LogisticRegression(solver='liblinear')
lr1.fit(X, y)

y_pred1_tr = lr1.predict(X_train)
y_pred1_te = lr1.predict(X_test)

print('Weights: \n', lr1.coef_)
print('Bias:', lr1.intercept_)

score_tr = lr1.score(X_train, y_train)
print('\nAccuracy on train data:', score_tr*100, '%')

score_te = lr1.score(X_test, y_test)
print('Accuracy on test data:', score_te*100, '%')

Weights: 
 [[0.14342407 2.27397988 1.13995842]]
Bias: [-1.07062084]

Accuracy on train data: 84.0625 %
Accuracy on test data: 90.0 %
