In [1]:
# ============================
# ✅ TASK 1 – DATA LOADING & PREPROCESSING
# ============================

import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# Load dataset
data = load_breast_cancer()
X = data.data
y = data.target

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
print("Features:", data.feature_names)

# Train-test split (70/30)
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

print("\n✅ Data loaded & standardized successfully!")

Shape of X: (569, 30)
Shape of y: (569,)
Features: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']

✅ Data loaded & standardized successfully!


In [2]:
# ============================
# ✅ TASK 2 – UTILITIES
# ============================

def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def relu(Z):
    return np.maximum(0, Z)

def sigmoid_derivative(A):
    return A * (1 - A)

def relu_derivative(Z):
    return (Z > 0).astype(float)

def compute_bce_loss(Y, Y_hat):
    m = Y.shape[1]
    eps = 1e-15
    Y_hat = np.clip(Y_hat, eps, 1 - eps)
    return -(1/m) * np.sum(Y*np.log(Y_hat) + (1-Y)*np.log(1-Y_hat))

def compute_mse_loss(Y, Y_hat):
    m = Y.shape[1]
    return (1/m) * np.sum((Y_hat - Y)**2)

In [8]:
# ============================
# ✅ TASK 3 – UPDATED MYANN CLASSIFIER
# ============================

class MyANNClassifier:
    def __init__(self, layer_dims, learning_rate=0.01, n_iterations=1000, loss='bce'):
        self.layer_dims = layer_dims
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.loss = loss
        self.parameters_ = {}
        self.costs_ = []

    def _initialize_parameters(self):
        np.random.seed(1)
        for l in range(1, len(self.layer_dims)):
            self.parameters_['W' + str(l)] = np.random.randn(
                self.layer_dims[l], self.layer_dims[l-1]
            ) * 0.01
            self.parameters_['b' + str(l)] = np.zeros((self.layer_dims[l], 1))

    def _forward_propagation(self, X):
        cache = []
        A = X

        for l in range(1, len(self.layer_dims)):
            W = self.parameters_['W' + str(l)]
            b = self.parameters_['b' + str(l)]
            Z = np.dot(W, A) + b
            
            if l == len(self.layer_dims) - 1:
                A = sigmoid(Z)  # Output layer
            else:
                A = relu(Z)     # Hidden layers
            
            cache.append((A, Z))

        return A, cache

    def _backward_propagation(self, Y, Y_hat, cache):
        grads = {}
        m = Y.shape[1]
        L = len(self.layer_dims) - 1  # number of layers with weights

        # ----- Output layer derivative (dA for last layer) -----
        if self.loss == 'bce':
            eps = 1e-15
            Y_hat = np.clip(Y_hat, eps, 1-eps)
            dA = -(np.divide(Y, Y_hat) - np.divide(1-Y, 1-Y_hat))
        else:
            dA = 2 * (Y_hat - Y)

        # ----- Output layer -----
        A_L, Z_L = cache[-1]
        dZ = dA * sigmoid_derivative(A_L)

        A_prev = cache[-2][0] if L > 1 else self.X_current  # If one-layer network, previous is input

        grads["dW" + str(L)] = (1/m) * np.dot(dZ, A_prev.T)
        grads["db" + str(L)] = (1/m) * np.sum(dZ, axis=1, keepdims=True)

        # ----- Hidden layers -----
        for l in reversed(range(1, L)):
            A_l, Z_l = cache[l-1]
            A_prev = cache[l-2][0] if l > 1 else self.X_current

            W_next = self.parameters_['W' + str(l+1)]
            dZ = np.dot(W_next.T, dZ) * relu_derivative(Z_l)

            grads["dW" + str(l)] = (1/m) * np.dot(dZ, A_prev.T)
            grads["db" + str(l)] = (1/m) * np.sum(dZ, axis=1, keepdims=True)

        return grads

    def _update_parameters(self, grads):
        L = len(self.layer_dims) - 1
        for l in range(1, L + 1):
            self.parameters_['W' + str(l)] -= self.learning_rate * grads['dW' + str(l)]
            self.parameters_['b' + str(l)] -= self.learning_rate * grads['db' + str(l)]

    def fit(self, X, y):
        X = X.T
        y = y.reshape(1, -1)
        self.X_current = X  # ✅ Needed for backward pass

        self._initialize_parameters()

        for i in range(self.n_iterations):
            Y_hat, cache = self._forward_propagation(X)

            if self.loss == 'bce':
                cost = compute_bce_loss(y, Y_hat)
            else:
                cost = compute_mse_loss(y, Y_hat)

            grads = self._backward_propagation(y, Y_hat, cache)
            self._update_parameters(grads)

            self.costs_.append(cost)

            if i % 1000 == 0:
                print(f"Iteration {i} | Loss = {cost:.6f}")

    def predict(self, X):
        X = X.T
        Y_hat, _ = self._forward_propagation(X)
        predictions = (Y_hat > 0.5).astype(int)
        return predictions.flatten()


In [9]:
# ============================
# ✅ TASK 4 – TRAIN MODELS
# ============================

# Model 1 – BCE loss (1 hidden layer)
model1 = MyANNClassifier(layer_dims=[30, 10, 1], learning_rate=0.001, n_iterations=5000, loss='bce')
model1.fit(X_train_scaled, y_train)
pred1 = model1.predict(X_val_scaled)
print("\n===== MODEL 1: BCE LOSS =====")
print(classification_report(y_val, pred1))

# Model 2 – MSE loss
model2 = MyANNClassifier(layer_dims=[30, 10, 1], learning_rate=0.001, n_iterations=5000, loss='mse')
model2.fit(X_train_scaled, y_train)
pred2 = model2.predict(X_val_scaled)
print("\n===== MODEL 2: MSE LOSS =====")
print(classification_report(y_val, pred2))

# Model 3 – Deeper network (BCE)
model3 = MyANNClassifier(layer_dims=[30, 10, 5, 1], learning_rate=0.001, n_iterations=5000, loss='bce')
model3.fit(X_train_scaled, y_train)
pred3 = model3.predict(X_val_scaled)
print("\n===== MODEL 3: BCE (DEEPER) =====")
print(classification_report(y_val, pred3))

Iteration 0 | Loss = 0.693108
Iteration 1000 | Loss = 0.679339
Iteration 2000 | Loss = 0.666914
Iteration 3000 | Loss = 0.641780
Iteration 4000 | Loss = 0.569008

===== MODEL 1: BCE LOSS =====
              precision    recall  f1-score   support

           0       0.98      0.86      0.92        63
           1       0.92      0.99      0.96       108

    accuracy                           0.94       171
   macro avg       0.95      0.92      0.94       171
weighted avg       0.94      0.94      0.94       171

Iteration 0 | Loss = 0.249980
Iteration 1000 | Loss = 0.246233
Iteration 2000 | Loss = 0.243112
Iteration 3000 | Loss = 0.240222
Iteration 4000 | Loss = 0.236999

===== MODEL 2: MSE LOSS =====
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           1       0.63      1.00      0.77       108

    accuracy                           0.63       171
   macro avg       0.32      0.50      0.39       171
weighted avg   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Iteration 1000 | Loss = 0.680726
Iteration 2000 | Loss = 0.673177
Iteration 3000 | Loss = 0.668578
Iteration 4000 | Loss = 0.665764

===== MODEL 3: BCE (DEEPER) =====
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           1       0.63      1.00      0.77       108

    accuracy                           0.63       171
   macro avg       0.32      0.50      0.39       171
weighted avg       0.40      0.63      0.49       171



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
