In [None]:
#Four Layers, Ten neurons, ReLU in hidden layers, sigmoid in output layer
# learning rate = 0.01 , epoch = 10000

import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
import tracemalloc

tracemalloc.start()

data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

scaler = StandardScaler()
y_train = y_train.reshape(-1,1)
y_test = y_test.reshape(-1,1)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


n_in = 30
n_out = 10

np.random.seed(0)

W1 = np.random.randn(n_in, n_out) * 0.01
b1 = np.zeros((1, n_out))

W2 = np.random.randn(n_out, n_out) * 0.01
b2 = np.zeros((1, n_out))

W3 = np.random.randn(n_out, n_out) * 0.01
b3 = np.zeros((1, n_out))

W4 = np.random.randn(n_out, 1) * 0.01
b4 = np.zeros((1, 1))

def sigmoid(x):
    x = np.clip(x, -500, 500)
    return 1 / (1+np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1-s)

def tanh(x):
    return ( (np.exp(x)-np.exp(-x)) / (np.exp(x)+np.exp(-x)) )

def tanh_derivative(x):
    t = tanh(x)
    return 1 - t**2

def ReLU(x):
    return np.maximum(0,x)

def ReLU_derivative(x):
    return (x > 0).astype(float)

def binary_cross_entropy(y, y_pred):
    epsilon = 1e-15  
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return (-1 / len(y_pred)) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

def loss_derivative(y, y_pred):
    epsilon = 1e-8  
    return -((y / (y_pred + epsilon)) - ((1 - y) / (1 - y_pred + epsilon)))

def Forward_Propagation(X_train, W1, W2, W3, W4, b1, b2, b3, b4):

    Z1 = np.dot(X_train, W1) + b1
    A1 = ReLU(Z1)

    Z2 = np.dot(A1, W2) + b2
    A2 = ReLU(Z2)

    Z3 = np.dot(A2, W3) + b3
    A3 = ReLU(Z3)

    Z4 = np.dot(A3, W4) + b4
    y_pred = sigmoid(Z4)

    return Z1, A1, Z2, A2, Z3, A3, Z4, y_pred


def Backward_Propagation(A1, A2, A3, y_pred, Z1, Z2, Z3, Z4, y_train,
                         learning_rate, W1, W2, W3, W4, b1, b2, b3, b4, X_train):

    dA4 = loss_derivative(y_train, y_pred)
    dZ4 = dA4 * sigmoid_derivative(Z4)
    dW4 = np.dot(A3.T, dZ4)
    db4 = np.sum(dZ4, axis=0, keepdims=True)

    dA3 = np.dot(dZ4, W4.T)
    dZ3 = dA3 * ReLU_derivative(Z3)
    dW3 = np.dot(A2.T, dZ3)
    db3 = np.sum(dZ3, axis=0, keepdims=True)

    dA2 = np.dot(dZ3, W3.T)
    dZ2 = dA2 * ReLU_derivative(Z2)
    dW2 = np.dot(A1.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * ReLU_derivative(Z1)
    dW1 = np.dot(X_train.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    W4 -= learning_rate * dW4
    b4 -= learning_rate * db4

    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3

    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1

    return W4, W3, W2, W1, b4, b3, b2, b1


# TRAIN
learning_rate = 0.01
n_epoch = 10000

for epoch in range(n_epoch):

    Z1, A1, Z2, A2, Z3, A3, Z4, y_pred = Forward_Propagation(X_train, W1, W2, W3, W4, b1, b2, b3, b4)
    loss = binary_cross_entropy(y_train, y_pred)
    W4, W3, W2, W1, b4, b3, b2, b1 = Backward_Propagation(A1, A2, A3, y_pred, Z1, Z2, Z3, Z4, y_train,learning_rate, W1, W2, W3, W4, b1, b2, b3, b4, X_train)

    if epoch % 1000 == 0:
        print(f"Epoch:{epoch} \t Train Loss: {loss}")


y_train = y_train.flatten()
y_pred = (y_pred > 0.5).astype(int).flatten()
print("\nTRAINING RESULTS:")
print(f"Accuracy: {accuracy_score(y_train,y_pred)}")
print(f"Precision: {precision_score(y_train,y_pred)}")
print(f"Recall: {recall_score(y_train,y_pred)}")
print(f"F-1 Score: {f1_score(y_train,y_pred)}")




#TEST

Z1,A1,Z2,A2,Z3,A3,Z4,y_pred = Forward_Propagation(X_test,W1,W2,W3,W4,b1,b2,b3,b4)
loss = binary_cross_entropy(y_test, y_pred)
y_test = y_test.flatten()
y_pred = (y_pred > 0.5).astype(int).flatten()
print("\nTEST RESULTS:")
print(f"Loss:{loss}")
print(f"Accuracy: {accuracy_score(y_test,y_pred)}")
print(f"Precision: {precision_score(y_test,y_pred)}")
print(f"Recall: {recall_score(y_test,y_pred)}")
print(f"F-1 Score: {f1_score(y_test,y_pred)}")


# LEARNABLE PARAMETERS

def Total_Learnable_Parameters(W1,W2,W3,W4,b1,b2,b3,b4):
    sum = 0

    sum += W1.size
    sum += W2.size
    sum += W3.size
    sum += W4.size
    sum += b1.size
    sum += b2.size
    sum += b3.size
    sum += b4.size

    return sum

print(f" \nTotal Learnable Parameters: {Total_Learnable_Parameters(W1,W2,W3,W4,b1,b2,b3,b4)}")


current, peak = tracemalloc.get_traced_memory()
print(f"Peak RAM usage: {peak / 1024 / 1024:.2f} MB")

tracemalloc.stop()

Epoch:0 	 Train Loss: 0.693147173558066
Epoch:1000 	 Train Loss: 0.07601631295222705
Epoch:2000 	 Train Loss: 0.07592969839148893
Epoch:3000 	 Train Loss: 0.0759206246349953
Epoch:4000 	 Train Loss: 0.07591735740097315
Epoch:5000 	 Train Loss: 0.07591570491188152
Epoch:6000 	 Train Loss: 0.07591472215782778
Epoch:7000 	 Train Loss: 0.07591407442290293
Epoch:8000 	 Train Loss: 0.07591361723052961
Epoch:9000 	 Train Loss: 0.0759132783826496

TRAINING RESULTS:
Accuracy: 0.9978021978021978
Precision: 0.9965156794425087
Recall: 1.0
F-1 Score: 0.9982547993019197

TEST RESULTS:
Loss:0.4221440532705125
Accuracy: 0.9824561403508771
Precision: 0.9859154929577465
Recall: 0.9859154929577465
F-1 Score: 0.9859154929577465
 
Total Learnable Parameters: 541
Peak RAM usage: 0.78 MB
