## Installations and Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [2]:
DATA_DIR = "/kaggle/input/bank-note-authentication-uci-data/BankNote_Authentication.csv"
data = pd.read_csv(DATA_DIR)

Data was inspected on a previous notebook, so we will skip that step.

In [3]:
# splitting features and classes
X, y = data.iloc[:, :-1], data.iloc[:, -1]

In [4]:
# converting data to numpy arrays for comptational reasons
X = X.to_numpy()
y = y.to_numpy().reshape(-1, 1)

In [5]:
# splitting data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42, stratify=y)

## Define Network Structure

In [6]:
class ThreeLayerPerceptron():
    def __init__(self, X_train, y_train, n_hidden, n_y, lr, activation_fn):
        self.X_train = X_train
        self.y_train = y_train
        self.n_feature = X_train.shape[1]
        self.n_hidden = n_hidden
        self.n_y = n_y
        self.params = {}
        self.cache = {}
        self.grads = {}
        self.lr = lr
        self.activation_fn = activation_fn

    def initialize_params(self):
        np.random.seed(42)
        W1 = np.random.randn(self.n_hidden, self.n_feature) * 0.01
        b1 = np.zeros((self.n_hidden, 1))
        W2 = np.random.randn(self.n_hidden, self.n_hidden) * 0.01
        b2 = np.zeros((self.n_hidden, 1))
        W3 = np.random.randn(self.n_y, self.n_hidden) * 0.01
        b3 = np.zeros((self.n_y, 1))

        self.params = {
            "W1" : W1,
            "b1" : b1,
            "W2" : W2,
            "b2" : b2,
            "W3" : W3,
            "b3" : b3,
        }
        return self.params

    def forward(self, X):
        W1 = self.params["W1"]
        b1 = self.params["b1"]
        W2 = self.params["W2"]
        b2 = self.params["b2"]
        W3 = self.params["W3"]
        b3 = self.params["b3"]
        
        if (self.activation_fn == "tanh"):
            Z1 = np.dot(W1, X.T) + b1
            A1 = np.tanh(Z1)
            Z2 = np.dot(W2, A1) + b2
            A2 = np.tanh(Z2)
            Z3 = np.dot(W3, A2) + b3
            A3 = self.sigmoid(Z3)
            A3 = np.clip(A3, 1e-10, 1 - 1e-10)
            
        elif (self.activation_fn == "RELU"):
            Z1 = np.dot(W1, X.T) + b1
            A1 = self.RELU(Z1)
            A1 = np.clip(A1, -0.9999, 0.9999)  
            Z2 = np.dot(W2, A1) + b2
            A2 = self.RELU(Z2)
            A2 = np.clip(A2, -0.9999, 0.9999)
            Z3 = np.dot(W3, A2) + b3
            A3 = self.sigmoid(Z3)
            A3 = np.clip(A3, 1e-10, 1 - 1e-10)
            
        else:
            print("Please write a valid activation function!")

        self.cache = {
            "Z1" : Z1,
            "A1" : A1,
            "Z2" : Z2,
            "A2" : A2,
            "Z3" : Z3,
            "A3" : A3,
        }
        return A3, self.cache

    def loss(self):
        A3 = self.cache["A3"]
        m = A3.shape[1]
        Y = self.y_train
        loss = - (np.dot(np.log(A3), Y) + np.dot(np.log(1 - A3), (1 - Y))) / m
        loss = float(np.squeeze(loss))
        return loss

    def backward(self):
        X = self.X_train
        y = self.y_train
        m = X.shape[0]
        W1 = self.params["W1"]
        W2 = self.params["W2"]
        W3 = self.params["W3"]
        A1 = self.cache["A1"]
        A2 = self.cache["A2"]
        A3 = self.cache["A3"]

        dZ3 = A3.T - y
        dW3 = np.dot(dZ3.T, A2.T) / m
        db3 = np.sum(dZ3, axis=0, keepdims=True)
        
        #dZ2 = A2.T - y 
        dZ2 = np.dot(dZ3, W3) * (1 - np.power(A2, 2)).T
        dW2 = np.dot(dZ2.T, A1.T) / m 
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m 
        
        dZ1 = np.dot(dZ2, W2) * (1 - np.power(A1, 2)).T 
        dW1 = np.dot(dZ1.T, X) / m 
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m 
        
        self.grads = {
            "dW1" : dW1,
            "dW2" : dW2,
            "dW3" : dW3,
            "db1" : db1,
            "db2" : db2,
            "db3" : db3,
        }

        return self.grads

    def update_params(self):
        lr = self.lr
        W1 = self.params["W1"]
        b1 = self.params["b1"]
        W2 = self.params["W2"]
        b2 = self.params["b2"]
        W3 = self.params["W3"]
        b3 = self.params["b3"]

        dW1 = self.grads["dW1"]
        db1 = self.grads["db1"]
        dW2 = self.grads["dW2"]
        db2 = self.grads["db2"]
        dW3 = self.grads["dW3"]
        db3 = self.grads["db3"]

        self.params["W1"] -= self.lr * self.grads["dW1"]
        self.params["b1"] -= self.lr * self.grads["db1"].T
        self.params["W2"] -= self.lr * self.grads["dW2"]
        self.params["b2"] -= self.lr * self.grads["db2"].T
        self.params["W3"] -= self.lr * self.grads["dW3"]
        self.params["b3"] -= self.lr * self.grads["db3"]

        return self.params

    def train(self, num_steps, print_cost=True):
        self.initialize_params()
        X = self.X_train

        for i in range(num_steps):
            A3, cache = self.forward(X)
            loss = self.loss()
            grads = self.backward()
            self.update_params()

            if (loss < 0.20):
                print(f"Loss at iteration {i} is {loss:.6f}")
                return

            if (print_cost and i % 500 == 0):
                print(f"Loss at iteration {i} is {loss:.6f}")
        print(f"The model could not exceed the 0.2 threshold.")
        print(f"Loss at iteration {i} is {loss:.6f}")

    def predict(self, X_test):
        params = self.params
        A3, cache = self.forward(X_test)
        preds = A3 > 0.5
        return preds
    
    # helper functions
    def sigmoid(self, Z):
        Z = np.clip(Z, -500, 500)  
        return 1 / (1 + np.exp(-Z))

    def RELU(self, x):
        x = np.nan_to_num(x)  
        return np.maximum(0, x)

## Training the Models

In [7]:
num_hidden = 5
num_y = 1
lr = 1e-2
MLP_tanh = ThreeLayerPerceptron(X_train, y_train, num_hidden, num_y, lr, activation_fn="tanh")
MLP_relu = ThreeLayerPerceptron(X_train, y_train, num_hidden, num_y, lr, activation_fn="RELU")

In [8]:
num_steps = 5000
MLP_tanh.train(num_steps)

Loss at iteration 0 is 0.693142
Loss at iteration 500 is 1.041981
Loss at iteration 1000 is 1.041966
Loss at iteration 1500 is 1.041930
Loss at iteration 2000 is 1.041816
Loss at iteration 2500 is 1.041233
Loss at iteration 3000 is 1.033466
Loss at iteration 3500 is 0.444413
Loss at iteration 3644 is 0.199680


In [10]:
num_steps = 10000
MLP_relu.train(num_steps)

Loss at iteration 0 is 0.693146
Loss at iteration 500 is 1.041991
Loss at iteration 1000 is 1.041989
Loss at iteration 1500 is 1.041987
Loss at iteration 2000 is 1.041982
Loss at iteration 2500 is 1.041975
Loss at iteration 3000 is 1.041961
Loss at iteration 3500 is 1.041935
Loss at iteration 4000 is 1.041875
Loss at iteration 4500 is 1.041713
Loss at iteration 5000 is 1.041147
Loss at iteration 5500 is 1.038629
Loss at iteration 6000 is 1.023669
Loss at iteration 6500 is 0.940988
Loss at iteration 7000 is 0.661084
Loss at iteration 7500 is 0.366599
Loss at iteration 8000 is 0.319855
Loss at iteration 8500 is 0.292973
Loss at iteration 9000 is 0.276791
Loss at iteration 9500 is 0.266534
The model could not exceed the 0.2 threshold.
Loss at iteration 9999 is 0.258621


## Testing the Models

In [11]:
tanh_preds = MLP_tanh.predict(X_test)
relu_preds = MLP_relu.predict(X_test)

In [12]:
def print_metrics(y_test, y_pred):
    y_test = y_test.flatten()
    y_pred = y_pred.flatten()

    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="binary")
    recall = recall_score(y_test, y_pred, average="binary")
    f1 = f1_score(y_test, y_pred, average="binary")
    conf_mx = confusion_matrix(y_test, y_pred)

    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nConfusion Matrix:")
    print(conf_mx)
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred)) 

In [13]:
print_metrics(y_test, tanh_preds)

Accuracy: 0.9745
Precision: 0.9675
Recall: 0.9754
F1 Score: 0.9714

Confusion Matrix:
[[149   4]
 [  3 119]]

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.97      0.98       153
           1       0.97      0.98      0.97       122

    accuracy                           0.97       275
   macro avg       0.97      0.97      0.97       275
weighted avg       0.97      0.97      0.97       275



In [14]:
print_metrics(y_test, relu_preds)

Accuracy: 0.9200
Precision: 0.8968
Recall: 0.9262
F1 Score: 0.9113

Confusion Matrix:
[[140  13]
 [  9 113]]

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.92      0.93       153
           1       0.90      0.93      0.91       122

    accuracy                           0.92       275
   macro avg       0.92      0.92      0.92       275
weighted avg       0.92      0.92      0.92       275

