Note: Use this template to develop your project. Do not change the steps. For each step, you may add additional cells if needed.

#### Group Information

Group No:

- Member 1: Tan Xin Sheng (22301827)
- Member 2: Jee Ci Hong (22303833)
- Member 3: Wong Zhi Heng (22304070)
- Member 4: Chan Jia Liang (22304211)

#### Import Libraries

In [None]:
%config Completer.use_jedi=False
import pandas as pd
import numpy as np
import tensorflow as tf
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource
import numpy as np
output_notebook()
from sklearn.metrics import classification_report, confusion_matrix

#### Load the dataset

In [None]:
url="https://raw.githubusercontent.com/xinsheng04/CPC251_A1/refs/heads/main/classification_dataset.csv"
dataset = pd.read_csv(url)
dataset

Unnamed: 0,f1,f2,f3,f4,f5,label
0,1.286233,15.643743,-1.879915,-11.294839,15.245472,0
1,2.853398,0.129878,17.620669,3.945204,8.157459,1
2,3.285310,3.176560,12.610554,-6.063613,1.831887,0
3,2.019516,-1.967793,9.306435,-0.938714,-1.203038,0
4,-2.326527,3.453234,13.855478,-5.236421,1.547216,0
...,...,...,...,...,...,...
995,-2.248262,-4.619586,3.248760,9.114543,4.370790,1
996,7.882330,1.942559,13.304597,-2.682707,0.623444,0
997,14.421812,-10.688891,5.242771,-2.954794,11.689658,1
998,5.566459,-4.118762,3.670333,7.948329,10.940144,1


#### Define the loss function

In [None]:
def loss_fn(y, yhat):
    """
    This function calculates the loss function
    """
    # Ensure consistent data types
    y = tf.cast(y, tf.float32)
    y = tf.reshape(y, (-1, 1))
    yhat = tf.cast(yhat, tf.float32)
    # We clip all yhat values by a small amount to remove any absolute 0. This is because log(0) is undefined and will cause the loss fn to return nan
    epsilon = 1e-7  # to avoid log(0)
    yhat = tf.clip_by_value(yhat, epsilon, 1 - epsilon)
    # The loss fn is binary cross-entropy
    J = tf.reduce_mean(-((y * tf.math.log(yhat) / tf.math.log(2.0)) + ((1 - y) * tf.math.log(1 - yhat) / tf.math.log(2.0))))
    return J

#### Define function to perform prediction

In [None]:
def sigmoid(z):
    """
    This function calculates the sigmoid function.
    """
    sigm = 1/(1+tf.exp(-z))
    return sigm

def relu(z):
    """
    This function calculates the ReLU function.
    """
    relu = tf.maximum(0.0,z)
    return relu

def forward(W, B, x):
    """
    This function calculates the forward pass (predicts the label).
    """
    """
    Inputs:
    - W: list of weight tensors [W0, W1]
         W[0] shape: (nneurons, nfeatures) -> input to hidden layer
         W[1] shape: (nneurons,)   -> hidden to output
    - B: list of bias tensors [B0, B1]
         B[0] shape: (nneurons,)   -> hidden layer bias
         B[1] shape: ()     -> scalar bias for output
    - x: input vector, shape (5,)
    """
    # Ensure consistent data types
    W[0] = tf.cast(W[0], tf.float32)
    W[1] = tf.cast(W[1], tf.float32)
    x = tf.cast(x, tf.float32)
    B[0] = tf.cast(B[0], tf.float32)
    B[1] = tf.cast(B[1], tf.float32)

    neuron_vals=[] # We start with an empty list then append predictions. Finally, we convert it to a tensor. This method avoids breaking gradient tape

    # Compute the prediction for hidden layer
    # shape can be used to obtain the number of hidden units. The dimension of W[0] is nneurons x nfeatures.
    for neuron in range(W[0].shape[0]):
      # logits = W . x + B
      z = tf.linalg.matvec(x, W[0][neuron]) + B[0][neuron]

      neuron_vals.append(relu(z))

    neuron_yhat = tf.stack(neuron_vals) #Shape: (nneurons,)

    # Compute the prediction for the output layer
    z = tf.linalg.matvec(tf.transpose(neuron_yhat), W[1]) + B[1]
    yhat = sigmoid(z)
    return yhat

#### Define function for model training
Display the training and validation loss values for each epoch of the training loop. The displayed value must be in 6 decimal places.<br>
Hint: <br>
Use `tf.GradientTape` to compute the gradients.

In [None]:
def train(W, B, x, y, alpha):
    """
    This function performs the forward pass, computes the gradient and update the weights and biases.
    """
    with tf.GradientTape() as tape:
        # Loss fn and forward fn are used to compute J with respect to W and B
        yhat = forward(W, B, x)
        J = loss_fn(y, yhat)
    dJ_dW, dJ_dB = tape.gradient(J, [W, B]) # Compute gradient
    W[0].assign_sub(alpha * dJ_dW[0])  # Update W0 (input to hidden layer)
    W[1].assign_sub(alpha * dJ_dW[1])  # Update W1 (hidden to output)
    B[0].assign_sub(alpha * dJ_dB[0])  # Update B0 (hidden layer biases)
    B[1].assign_sub(alpha * dJ_dB[1])  # Update B1 (output layer bias)
    return [W, B]

def fit(model, train_ds, vald_ds, batch_size, alpha, max_epochs):
    """
    This function implements the training loop.
    """
    train_loss_per_epoch = []
    val_loss_per_epoch = []
    # Early stopping regularization
    j = 0 # Patience counter
    v = float('inf') # best validation loss
    patience = 20 # Maximum amount of patience permitted before training is stopped early
    # Best weight and bias parameters
    w_best = model[0]
    b_best = model[1]

    for epoch in range(max_epochs):
        train_loss = 0
        train_batches = 0

        # Train model and compute training loss
        for x_batch_train, y_batch_train in train_ds.batch(batch_size):
            model = train(model[0], model[1], x_batch_train, y_batch_train, alpha)

        for x_batch_train, y_batch_train in train_ds.batch(batch_size):
            yhat = forward(model[0], model[1], x_batch_train)
            train_loss += loss_fn(y_batch_train, yhat)
            train_batches += 1

        # train loss per epoch is average train loss per batch
        train_loss_mean = train_loss/train_batches
        train_loss_per_epoch.append(train_loss_mean)

        # Compute validation loss
        val_loss = 0
        val_batches = 0
        for x_batch_val, y_batch_val in vald_ds.batch(batch_size):
          yhat = forward(model[0], model[1], x_batch_val)
          val_loss += loss_fn(y_batch_val, yhat)
          val_batches += 1

        # val loss per epoch is average val loss per batch
        val_loss_mean = val_loss/val_batches
        val_loss_per_epoch.append(val_loss_mean)
        print(f"Epoch {epoch+1}: Train Loss = {train_loss_mean:.4f}, Val Loss = {val_loss_mean:.4f}")

        if(val_loss_mean < v):
          # Update w_best, b_best, reset j to 0, update v
          j = 0
          w_best = model[0]
          b_best = model[1]
          v = val_loss_mean
        else:
          j += 1 # increase j counter
        if(j > patience):
          # end the training cycle early if the validation loss is not improving
          print(f"Early stopping of training cycle due to validation loss value not improving. Patience = {patience}")
          break

    model = [w_best, b_best]
    return model, train_loss_per_epoch, val_loss_per_epoch

#### Define the tensors to hold the weights and biases (create the model)
Hint: <br>
Use `tf.Variable` to create the tensors.<br>
Put the tensors in a list.

In [None]:
nneurons = 10 # number of neurons
nfeatures = 5 # 5 features: [f1, f2, f3, f4, f5] are used
batch_size = 50
alpha = 0.0001
epochs = 500
W = [
    tf.Variable(np.random.randn(nneurons, nfeatures), dtype=tf.float32),  # Input to hidden layer
    tf.Variable(np.random.randn(nneurons), dtype=tf.float32)     # Hidden to output layer
]

B = [
    tf.Variable(np.random.randn(nneurons), dtype=tf.float32),  # Bias for hidden layer
    tf.Variable(np.random.randn(1), dtype=tf.float32)   # Bias for output layer
]
model = [W, B]
print(f"W = {model[0][0].numpy()} {model[0][1].numpy()}")
print()
print(f"B = {model[1][0].numpy()} {model[1][1].numpy()}")

W = [[-0.75003463 -0.9193431   0.30809     0.78508955 -1.0827471 ]
 [ 1.2571588  -0.25276953  0.9932638  -0.16744997  0.82432914]
 [ 1.9697998   0.8349025   1.8573571   0.00243461  0.7833899 ]
 [-0.86306304 -0.84122187  1.0752046  -0.10224541  1.9814366 ]
 [-0.38581765 -0.6640211   0.41697913  0.6161078  -0.63865685]
 [-0.48527357 -0.7884209   1.4255979   0.07700045  0.4904633 ]
 [-1.1251894  -0.7095601  -0.69189173  0.67671067 -1.0105356 ]
 [ 0.07639316 -1.3005447   0.60728616 -0.7284462   1.4135318 ]
 [-0.817176    0.8899839  -2.1699095  -0.549517   -0.5309505 ]
 [-0.13242227 -0.01786664  1.2714607  -0.44251543  0.26207814]] [-1.5443022   0.6397835  -0.07624615  0.17967835  0.12704368  0.46972516
 -0.39366126  0.12492722 -2.459607    0.07686502]

B = [-0.3354743   1.7833872  -1.034346    1.1316265  -0.44773486  0.6653773
  0.01819446  0.44361678  0.9504468  -1.6466304 ] [1.7567449]


#### Split the dataset
The ratio of training and test is 7:1:2.

In [None]:
from sklearn.model_selection import train_test_split
X = dataset.drop("label", axis=1)
y = dataset["label"]
print(f"Feature shape: {X.shape}, Feature columns: {X.columns}")
print(f"Label shape: {y.shape}")

Feature shape: (1000, 5), Feature columns: Index(['f1', 'f2', 'f3', 'f4', 'f5'], dtype='object')
Label shape: (1000,)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_train, y_train, test_size=0.333, random_state=42)

#### Normalize the data

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

#### Train the model

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val))
model, train_loss_per_epoch, val_loss_per_epoch = fit(model, train_ds, val_ds, batch_size, alpha, epochs)
# model, train_loss, val_loss = fit(model, train_ds, train_ds, batch_size, alpha, epochs)


Epoch 1: Train Loss = 3.3780, Val Loss = 3.3874
Epoch 2: Train Loss = 3.3744, Val Loss = 3.3837
Epoch 3: Train Loss = 3.3708, Val Loss = 3.3801
Epoch 4: Train Loss = 3.3672, Val Loss = 3.3764
Epoch 5: Train Loss = 3.3637, Val Loss = 3.3728
Epoch 6: Train Loss = 3.3600, Val Loss = 3.3692
Epoch 7: Train Loss = 3.3564, Val Loss = 3.3655
Epoch 8: Train Loss = 3.3528, Val Loss = 3.3619
Epoch 9: Train Loss = 3.3492, Val Loss = 3.3583
Epoch 10: Train Loss = 3.3457, Val Loss = 3.3547
Epoch 11: Train Loss = 3.3421, Val Loss = 3.3511
Epoch 12: Train Loss = 3.3385, Val Loss = 3.3475
Epoch 13: Train Loss = 3.3349, Val Loss = 3.3439
Epoch 14: Train Loss = 3.3313, Val Loss = 3.3403
Epoch 15: Train Loss = 3.3278, Val Loss = 3.3367
Epoch 16: Train Loss = 3.3242, Val Loss = 3.3331
Epoch 17: Train Loss = 3.3206, Val Loss = 3.3295
Epoch 18: Train Loss = 3.3170, Val Loss = 3.3259
Epoch 19: Train Loss = 3.3134, Val Loss = 3.3224
Epoch 20: Train Loss = 3.3099, Val Loss = 3.3188
Epoch 21: Train Loss = 3.3063

#### Display the training loss and validation loss against epoch graph

In [None]:
# Convert TensorFlow tensors to plain Python lists
train_loss_list = [float(x) for x in train_loss_per_epoch]
val_loss_list = [float(x) for x in val_loss_per_epoch]

# Sample x values (epochs)
epochs = list(range(1, len(train_loss_list) + 1))

# Create data sources for hover
source = ColumnDataSource(data={'epoch': epochs, 'train_loss': train_loss_list, 'val_loss': val_loss_list})

# Create figure
p = figure(title="Training and Validation Loss", x_axis_label='Epoch', y_axis_label='Loss', width=900, height=600)
p.line('epoch', 'train_loss', source=source, legend_label="Training Loss", line_width=2, color="blue", muted_alpha=0.1)
p.line('epoch', 'val_loss', source=source, legend_label="Validation Loss", line_width=2, color="red", muted_alpha=0.1)

# Add hover tool
hover = HoverTool(tooltips=[("Epoch", "@epoch"), ("Training Loss", "@train_loss"), ("Validation Loss", "@val_loss")])
p.add_tools(hover)
p.legend.location = "top_right"
p.legend.click_policy = "mute"  # Enables toggling visibility on click

# Show plot
show(p)

#### Predict the test set

In [None]:
def predict (model, test_ds):
  """
  This function predicts the test set.
  """
  report=[]
  for x_batch_test, y_batch_test in test_ds.batch(20):
    yhat = forward(model[0], model[1], x_batch_test)
    J = loss_fn(y_batch_test, yhat)
    yhat = tf.cast(yhat > 0.5, tf.int32)  # now we can hard cast yhat to return 0 or 1
    for i in range(x_batch_test.shape[0]):
      report.append([*x_batch_test.numpy()[i], y_batch_test.numpy()[i], yhat.numpy()[i], J.numpy()])
  report = pd.DataFrame(report, columns=[*X.columns, "Actual label", "Predicted label", "Loss"])
  return report

In [None]:
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test))
predictions = predict(model, test_ds)
print(predictions)

           f1        f2        f3        f4        f5  Actual label  \
0   -1.138573 -2.516707  0.289952 -0.926953  1.180336             1   
1    1.264300 -0.133768  1.005905 -0.614461 -1.363974             0   
2    0.834665  0.762059  1.043679 -0.526401 -0.330217             0   
3   -0.034206 -0.466700  0.110991 -0.212160 -1.429085             0   
4   -0.612726 -0.745587  0.957615 -0.179273  0.828690             1   
..        ...       ...       ...       ...       ...           ...   
461  0.209144  0.428507  0.365459 -0.800480 -0.477457             0   
462  0.590765  0.458360 -1.368133 -0.549997  0.305126             0   
463 -0.187345 -0.724364 -0.150873  0.255468 -1.476060             1   
464 -0.925007  1.144610 -0.424039 -0.785825  0.754091             0   
465  0.050168 -0.192236 -0.092238  1.186582 -0.017933             1   

     Predicted label      Loss  
0                  1  1.968059  
1                  1  1.968059  
2                  1  1.968059  
3              

#### Display the confusion matrix and the classification report.

All the outputs (training
loss, validation loss,
graph and model
evaluation) are
displayed.

In [None]:
print("Classification report")
print(classification_report(predictions["Predicted label"].tolist(), predictions["Actual label"]))
print("Confusion matrix")
print(confusion_matrix(predictions["Predicted label"].tolist(), predictions["Actual label"].tolist()))

Classification report
              precision    recall  f1-score   support

           0       0.63      0.64      0.64       232
           1       0.64      0.63      0.64       234

    accuracy                           0.64       466
   macro avg       0.64      0.64      0.64       466
weighted avg       0.64      0.64      0.64       466

Confusion matrix
[[149  83]
 [ 86 148]]
