<a href="https://colab.research.google.com/github/vedpd/ML-from-Scratch/blob/main/LogisticRegression_Gradient_Descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
import numpy as np

# def sigmoid(X):
#   return 1 / (1 + np.exp(-X))

class LogisticRegression():
  def __init__(self,lr=0.001,n_iters=1000):
    self.lr = lr
    self.n_iters = n_iters
    self.weights = None
    self.bias = None

  def fit(self, X, y):
    n_samples, n_features = X.shape
    self.weights = np.zeros(n_features)
    self.bias = 0

    for _ in range(self.n_iters):
      linear_predictions = np.dot(X, self.weights) + self.bias
      actual_predictions = self.sigmoid(linear_predictions)

      # compute gradients
      dw = (1/n_samples) *(np.dot(X.T,(actual_predictions - y)))
      db = (1/n_samples) * np.sum(actual_predictions - y)

      # update parameters
      self.weights = self.weights - self.lr * dw
      self.bias = self.bias - self.lr * db

  def sigmoid(self,X):
    return 1 / (1 + np.exp(-X))


  def predict(self, X):
    linear_preds = np.dot(X,self.weights) + self.bias #linear predictions
    probs = self.sigmoid(linear_preds) # probability predictions
    y_pred = [1 if i > 0.5 else 0 for i in probs] #class predictions

    return y_pred

  def predict_proba(self, X):
    linear_combination = np.dot(X,self.weights) + self.bias
    probs = self.sigmoid(linear_combination)
    return probs


In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets

In [33]:
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [34]:
bc.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [35]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

  return 1 / (1 + np.exp(-X))


In [47]:
y_pred_train = log_reg.predict(X_train)

  return 1 / (1 + np.exp(-X))


In [36]:
y_pred = log_reg.predict(X_test)

  return 1 / (1 + np.exp(-X))


In [37]:
y_prob = log_reg.predict_proba(X_test)

y_prob

  return 1 / (1 + np.exp(-X))


array([1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
       9.97518405e-001, 1.00000000e+000, 2.49510158e-040, 1.00000000e+000,
       6.59242825e-243, 4.15952498e-151, 0.00000000e+000, 4.49086206e-009,
       1.00000000e+000, 6.92194505e-018, 1.00000000e+000, 2.29986108e-135,
       1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.04198245e-164,
       9.99999945e-001, 1.97490194e-080, 4.34436322e-188, 0.00000000e+000,
       1.45636813e-249, 9.45667407e-039, 4.40254972e-077, 1.00000000e+000,
       1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
       1.40997869e-077, 9.99998806e-001, 1.00000000e+000, 9.99999990e-001,
       9.99999996e-001, 2.34476908e-078, 5.52906870e-042, 1.00000000e+000,
       5.02131333e-110, 1.00000000e+000, 6.15471587e-288, 3.38775863e-012,
       1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.87192084e-030,
       1.37699542e-275, 1.71495280e-009, 1.00000000e+000, 9.99999992e-001,
       1.00000000e+000, 4

In [38]:
#metrics to validate
def accuracy(y_pred,y_test):
  return np.sum(y_pred == y_test)/len(y_test)


acc = accuracy(y_pred,y_test)
print(acc)

0.8947368421052632


In [48]:
acc_train = accuracy(y_pred_train,y_train)
print(acc_train)

0.8813186813186813


In [49]:
def recall(y_pred,y_test):
  true_positive = np.sum((y_pred == 1) & (y_test == 1))
  false_negative = np.sum((y_pred == 0) & (y_test == 1))

  if true_positive + false_negative == 0:
      return 0
  else:
      return true_positive / (true_positive + false_negative)



In [40]:
rec = recall(y_pred,y_test)
print(rec)

0


In [41]:
def precision(y_pred,y_test):
  true_positive = np.sum((y_pred == 1) & (y_test == 1))
  false_positive = np.sum((y_pred == 1) & (y_test == 0))

  if true_positive + false_positive == 0:
      return 0
  else:
      return true_positive / (true_positive + false_positive)


In [42]:
prec = precision(y_pred,y_test)
print(prec)

0


In [43]:
def eval_confusion_matrix(y_pred,y_test):
  confusion_matrix = [[0, 0],
                   [0, 0]]

  for actual, predicted in zip(y_test, y_pred):
    confusion_matrix[actual][predicted] += 1

  tn = confusion_matrix[0][0]
  fp = confusion_matrix[0][1]
  fn = confusion_matrix[1][0]
  tp = confusion_matrix[1][1]

  print(f"Confusion Matrix:\n{confusion_matrix}")

In [44]:
eval_confusion_matrix(y_pred,y_test)

Confusion Matrix:
[[42, 3], [9, 60]]


In [45]:
coefficients = pd.DataFrame(data=log_reg.weights, index=bc.feature_names, columns=['Coefficient'])
bias = pd.DataFrame(data=[log_reg.bias], columns=['Bias'], index=['Bias'])

In [46]:
coefficients, bias

(                         Coefficient
 mean radius                 0.306904
 mean texture                0.414774
 mean perimeter              1.775077
 mean area                   0.658122
 mean smoothness             0.002741
 mean compactness           -0.001554
 mean concavity             -0.005842
 mean concave points        -0.002422
 mean symmetry               0.005359
 mean fractal dimension      0.002207
 radius error                0.001380
 texture error               0.028726
 perimeter error            -0.011470
 area error                 -0.802903
 smoothness error            0.000135
 compactness error          -0.000472
 concavity error            -0.000749
 concave points error       -0.000103
 symmetry error              0.000532
 fractal dimension error     0.000015
 worst radius                0.325176
 worst texture               0.525958
 worst perimeter             1.781263
 worst area                 -1.007241
 worst smoothness            0.003371
 worst compa

In [19]:
log_odds = pd.concat([coefficients, bias], axis=0)
log_odds

Unnamed: 0,Coefficient,Bias
mean radius,0.306904,
mean texture,0.414774,
mean perimeter,1.775077,
mean area,0.658122,
mean smoothness,0.002741,
mean compactness,-0.001554,
mean concavity,-0.005842,
mean concave points,-0.002422,
mean symmetry,0.005359,
mean fractal dimension,0.002207,
