In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
file_path = r"C:\Users\antoc\OneDrive\Desktop\6M\Month1\MLmodels\lRregression\breast_data.csv"
data = pd.read_csv(file_path)
print(data.head())

Xi= data.iloc[:,:-1]
yi = data.iloc[:,-1]  # we ve separated the labes and features 

X_train, X_test, y_train, y_test = train_test_split(Xi,yi,test_size= 0.1, random_state= 20)

In [None]:
class BreastCancerPrediction:
    
    def __init__(self, X, y):
        self.X = np.array(X)
        self.y = np.array(y).reshape(-1,1)
        self.W = np.ones((1,9))/2 # initialise the weights array as all ones 
        self.b = 0 # initialise the bias as 0 
        self.M = len(self.X)
        self.alpha = 0.001
        self.losses = []
        
        
        
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))
    
    def cross_entropy(self, y_pred, y):
        epsilon = 1e-15  # small epsilon value to prevent numerical instability
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # Clip predicted probabilities to avoid extreme values
        return (-y) * np.log(y_pred) - (1 - y) * np.log(1 - y_pred)
    
    def loss(self):
        
        self.y = np.where(self.y == 2, 0, self.y)
        self.y = np.where(self.y == 4, 1, self.y)
        
        z = np.matmul(self.X, self.W.T) + self.b
        
        y_pred = self.sigmoid(z)
        
        #print(self.cross_entropy(y_pred, self.y).shape)
        loss_val =  sum( self.cross_entropy(y_pred,self.y)) / self.M
        self.losses.append(loss_val)
        return loss_val
    
    def compute_gradient(self):
        
        z = np.matmul(self.X, self.W.T) + self.b
        
        y_pred = self.sigmoid(z)
    
        dL_dw = np.matmul(self.X.T, y_pred-self.y) / self.M
        
        dL_db = sum(y_pred - self.y) / self.M
        
        return dL_dw, dL_db
        
    
    def train(self):
        #print(self.W, self.b)
        for i in range(1000):
            self.loss()
            dL_dw, dL_db = self.compute_gradient()
            self.W = self.W - self.alpha * dL_dw.T
            self.b = self.b - self.alpha * dL_db
        #print(self.W, self.b)
    
        plt.plot(range(1, len(self.losses) + 1), self.losses)
        plt.xlabel("Iterations")
        plt.ylabel("Loss")
        plt.title("Loss over Iterations")
        plt.show()

    def predict(self, X_test):
        z = np.matmul(X_test, self.W.T) + self.b
        y_pred = self.sigmoid(z)
        # predicted probabilities -> class labels (0 or 1)
        y_pred_classes = np.where(y_pred >= 0.5, 1, 0)
        return y_pred_classes
        

In [None]:
logistic_model = BreastCancerPrediction(X_train, y_train)
logistic_model.train()
X_test = np.array(X_test)
print(X_test.shape)
y_pred_test = logistic_model.predict(X_test)

#predicted classes/gold labels
cnt = 0
for pred, gold in zip(y_pred_test, y_test):
    if(pred == gold / 2 - 1):
        cnt+=1
    print("Predicted: ", pred, "Gold Label: ", gold)
print("Accuracy: ",cnt / len(y_test))