## Installations and Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

## Dataset Creation

In [2]:
DATA_DIR = "/kaggle/input/bank-note-authentication-uci-data/BankNote_Authentication.csv"
data = pd.read_csv(DATA_DIR)

In [3]:
# checking the data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1372 entries, 0 to 1371
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   variance  1372 non-null   float64
 1   skewness  1372 non-null   float64
 2   curtosis  1372 non-null   float64
 3   entropy   1372 non-null   float64
 4   class     1372 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 53.7 KB


In [4]:
data.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [5]:
# splitting features and classes
X, y = data.iloc[:, :-1], data.iloc[:, -1]

In [6]:
# converting data into numpy arrays for comptational reasons
X = X.to_numpy()
y = y.to_numpy().reshape(-1, 1)

In [7]:
# splitting data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42, stratify=y)

## Define Network Structure

In [8]:
class TwoLayerPerceptron():
    def __init__(self, X_train, y_train, n_hidden, n_y, lr, activation_fn):
        self.X_train = X_train
        self.y_train = y_train
        self.n_feature = X_train.shape[1]
        self.n_hidden = n_hidden
        self.n_y = n_y
        self.params = {}
        self.cache = {}
        self.grads = {}
        self.lr = lr
        self.activation_fn = activation_fn

    def initialize_params(self):
        np.random.seed(42)
        W1 = np.random.randn(self.n_hidden, self.n_feature) * 0.01
        b1 = np.zeros((self.n_hidden, 1))
        W2 = np.random.randn(self.n_y, self.n_hidden) * 0.01
        b2 = np.zeros((self.n_y, 1))

        self.params = {
            "W1" : W1,
            "b1" : b1,
            "W2" : W2,
            "b2" : b2
        }
        return self.params

    def forward(self, X):
        W1 = self.params["W1"]
        b1 = self.params["b1"]
        W2 = self.params["W2"]
        b2 = self.params["b2"]

        Z1 = np.dot(W1, X.T) + b1
        if (self.activation_fn == "tanh"):
            A1 = np.tanh(Z1)
        elif (self.activation_fn == "RELU"):
            A1 = self.RELU(Z1)
            A1 = np.clip(A1, -0.9999, 0.9999)  
        else:
            print("Please write a valid activation function!")
        Z2 = np.dot(W2, A1) + b2
        A2 = self.sigmoid(Z2)
        A2 = np.clip(A2, 1e-10, 1 - 1e-10)

        self.cache = {
            "Z1" : Z1,
            "A1" : A1,
            "Z2" : Z2,
            "A2" : A2,
        }
        return A2, self.cache

    def loss(self):
        A2 = self.cache["A2"]
        m = A2.shape[1]
        Y = self.y_train
        loss = - (np.dot(np.log(A2), Y) + np.dot(np.log(1 - A2), (1 - Y))) / m
        loss = float(np.squeeze(loss))
        return loss

    def backward(self):
        X = self.X_train
        y = self.y_train
        m = X.shape[0]
        W1 = self.params["W1"]
        W2 = self.params["W2"]
        A1 = self.cache["A1"]
        A2 = self.cache["A2"]

        dZ2 = A2.T - y 
        dW2 = np.dot(dZ2.T, A1.T) / m 
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m 
        dZ1 = np.dot(dZ2, W2) * (1 - np.power(A1, 2)).T 
        dW1 = np.dot(dZ1.T, X) / m 
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m 
        
        self.grads = {
            "dW1" : dW1,
            "dW2" : dW2,
            "db1" : db1,
            "db2" : db2
        }

        return self.grads

    def update_params(self):
        lr = self.lr
        W1 = self.params["W1"]
        b1 = self.params["b1"]
        W2 = self.params["W2"]
        b2 = self.params["b2"]

        dW1 = self.grads["dW1"]
        db1 = self.grads["db1"]
        dW2 = self.grads["dW2"]
        db2 = self.grads["db2"]

        self.params["W1"] -= self.lr * self.grads["dW1"]
        self.params["b1"] -= self.lr * self.grads["db1"].T
        self.params["W2"] -= self.lr * self.grads["dW2"]
        self.params["b2"] -= self.lr * self.grads["db2"]

        return self.params

    def train(self, num_steps, print_cost=True):
        self.initialize_params()
        X = self.X_train

        for i in range(num_steps):
            A2, cache = self.forward(X)
            loss = self.loss()
            grads = self.backward()
            self.update_params()

            if print_cost and i % 500 == 0:
                print(f"Loss at iteration {i} is {loss:.6f}")
        print(f"Loss at iteration {num_steps} is {loss:.6f}")

    def predict(self, X_test):
        params = self.params
        A2, cache = self.forward(X_test)
        preds = A2 > 0.5
        return preds
    
    # helper functions
    def sigmoid(self, Z):
        Z = np.clip(Z, -500, 500)  # Prevent extreme values
        return 1 / (1 + np.exp(-Z))

    def RELU(self, x):
        x = np.nan_to_num(x)  # Replace NaNs with 0
        return np.maximum(0, x)

In [9]:
num_hidden = 5
num_y = 1
lr = 1e-2
MLP_tanh = TwoLayerPerceptron(X_train, y_train, num_hidden, num_y, lr, activation_fn="tanh")
MLP_RELU = TwoLayerPerceptron(X_train, y_train, num_hidden, num_y, lr, activation_fn="RELU")

In [10]:
num_steps = 1500
MLP_tanh.train(num_steps)

Loss at iteration 0 is 0.693481
Loss at iteration 500 is 0.293852
Loss at iteration 1000 is 0.107952
Loss at iteration 1500 is 0.065998


In [11]:
num_steps = 7500
MLP_RELU.train(num_steps)

Loss at iteration 0 is 0.693206
Loss at iteration 500 is 0.547947
Loss at iteration 1000 is 0.387134
Loss at iteration 1500 is 0.324073
Loss at iteration 2000 is 0.291502
Loss at iteration 2500 is 0.273908
Loss at iteration 3000 is 0.263561
Loss at iteration 3500 is 0.256947
Loss at iteration 4000 is 0.251553
Loss at iteration 4500 is 0.246964
Loss at iteration 5000 is 0.242369
Loss at iteration 5500 is 0.238739
Loss at iteration 6000 is 0.235589
Loss at iteration 6500 is 0.232700
Loss at iteration 7000 is 0.230127
Loss at iteration 7500 is 0.228058


In [12]:
tanh_preds = MLP_tanh.predict(X_test)
RELU_preds = MLP_RELU.predict(X_test)

In [13]:
def print_metrics(y_test, y_pred):
    y_test = y_test.flatten()
    y_pred = y_pred.flatten()

    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="binary")
    recall = recall_score(y_test, y_pred, average="binary")
    f1 = f1_score(y_test, y_pred, average="binary")
    conf_mx = confusion_matrix(y_test, y_pred)

    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nConfusion Matrix:")
    print(conf_mx)
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred)) 

In [14]:
print_metrics(y_test, tanh_preds)

Accuracy: 0.9855
Precision: 0.9683
Recall: 1.0000
F1 Score: 0.9839

Confusion Matrix:
[[149   4]
 [  0 122]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.97      0.99       153
           1       0.97      1.00      0.98       122

    accuracy                           0.99       275
   macro avg       0.98      0.99      0.99       275
weighted avg       0.99      0.99      0.99       275



In [15]:
print_metrics(y_test, RELU_preds)

Accuracy: 0.9345
Precision: 0.9194
Recall: 0.9344
F1 Score: 0.9268

Confusion Matrix:
[[143  10]
 [  8 114]]

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       153
           1       0.92      0.93      0.93       122

    accuracy                           0.93       275
   macro avg       0.93      0.93      0.93       275
weighted avg       0.93      0.93      0.93       275

