# Test om koden er riktig implementert

Her er et forslag til testfunksjoner for å sjekke om koden er riktig implementert.
```assert variabel``` vil gi en feilmelding med mindre variabelen ```variabel = True```. For eksempel vil ```assert a == b``` gi en feilmelding med mindre ```a``` og ```b``` er like.

In [1]:
#For eksempel:
variable = True
assert variable, "You need to change 'variable' to True"

In [2]:
from layers import *
from neural_network import NeuralNetwork
from utils import onehot
import numpy as np
import matplotlib.pyplot as plt
from data_generators import get_train_test_sorting

In [3]:
#We choose some arbitrary values for the dimensions
b = 6
n_max = 7
m = 8
n = 5

d = 10
k = 5
p = 20

#Create an arbitrary dataset
x = np.random.randint(0, m, (b,n))
y = np.random.randint(0, m, (b,n_max))

#initialize the layers
feed_forward = FeedForward(d,p)
attention = Attention(d,k)
embed_pos = EmbedPosition(n_max,m,d)
un_embed = LinearLayer(d,m)
softmax = Softmax()


#a manual forward pass
X = onehot(x, m)

z0 = embed_pos.forward(X)
z1 = feed_forward.forward(z0)
z2 = attention.forward(z1)
z3 = un_embed.forward(z2)
Z = softmax.forward(z3) 


#check the shapes
assert X.shape == (b,m,n), f"X.shape={X.shape}, expected {(b,m,n)}"
assert z0.shape == (b,d,n), f"z0.shape={z0.shape}, expected {(b,d,n)}"
assert z1.shape == (b,d,n), f"z1.shape={z1.shape}, expected {(b,d,n)}"
assert z2.shape == (b,d,n), f"z2.shape={z2.shape}, expected {(b,d,n)}"
assert z3.shape == (b,m,n), f"z3.shape={z3.shape}, expected {(b,m,n)}"
assert Z.shape == (b,m,n), f"Z.shape={Z.shape}, expected {(b,m,n)}"

#is X one-hot?
assert X.sum() == b*n, f"X.sum()={X.sum()}, expected {b*n}"


assert np.allclose(Z.sum(axis=1), 1), f"Z.sum(axis=1)={Z.sum(axis=1)}, expected {np.ones(b)}"
assert np.abs(Z.sum() - b*n) < 1e-5, f"Z.sum()={Z.sum()}, expected {b*n}"
assert np.all(Z>=0), f"Z={Z}, expected all entries to be non-negative"



In [4]:

#test the forward pass
x = np.random.randint(0, m, (b,n_max))
X = onehot(x, m)

#we test with a y that is shorter than the maximum length
n_y = n_max - 1
y = np.random.randint(0, m, (b,n_y))

#initialize a neural network based on the layers above
network = NeuralNetwork([embed_pos, feed_forward, attention, un_embed, softmax])
#and a loss function
loss = CrossEntropy()

#do a forward pass
Z = network.forward(X)

#compute the loss
L = loss.forward(Z, y)

#get the derivative of the loss wrt Z
grad_Z = loss.backward()
#and perform a backward pass
_ = network.backward(grad_Z)

#and and do a gradient descent step
_ = network.step_gd(0.01)

In [5]:
"""
Here you may add additional tests to for example:
- Check if the ['d'] keys in the parameter dictionaries are not None, or receive something when running backward pass
- Check if the parameters change when you perform a gradient descent step
- Check if the loss decreases when you perform a gradient descent step

This is voluntary, but could be useful.
"""

"\nHere you may add additional tests to for example:\n- Check if the ['d'] keys in the parameter dictionaries are not None, or receive something when running backward pass\n- Check if the parameters change when you perform a gradient descent step\n- Check if the loss decreases when you perform a gradient descent step\n\nThis is voluntary, but could be useful.\n"

In [6]:
#check if loss is non-negative
assert L >= 0, f"L={L}, expected L>=0"
assert grad_Z.shape == Z.shape, f"grad_Z.shape={grad_Z.shape}, expected {Z.shape}"

#check if onehot(y) gives zero loss
Y = onehot(y, m)
L = loss.forward(Y, y)
assert L < 1e-5, f"L={L}, expected L<1e-5"


# Unittesting

Her er en rekke unittester for diverse deler av prosjektet som vi programmerte, samt som vi tenkte kunne være lurt å sjekke mens vi holdt på. Noen av disse inneholder metoder fra Tensorflow, som ble brukt til å måle våre egne metoder mot. Kunstig intelligens ble brukt til å generere utkast av disse testene, samt ble brukt til idemyldring for hvilke tester som kunne vært lurt å gjennomføre, vi merket i de fleste tilfeller at var det nødvendig for oss å endre testene som kunstig intelligens genererte for å faktisk få dem til å fungere for vårt prosjekt spesifikt.

In [17]:
import unittest

from neural_network import NeuralNetwork
import layers
import tensorflow as tf
from utils import onehot
from tensorflow.keras.layers import Layer, Softmax, Input
from tensorflow.keras.models import Model
from data_generators import get_train_test_sorting
import unittest
import numpy as np


# Assuming Attention, LinearLayer, and Softmax are defined in attention_module
class AttentionTF(Layer):

    def __init__(self, d, k, **kwargs):
        super(layers.Attention, self).__init__(**kwargs)
        # Initializing weights
        self.W_O = self.add_weight(
            name="W_O", shape=(k, d), initializer="random_normal", trainable=True
        )
        self.W_V = self.add_weight(
            name="W_V", shape=(k, d), initializer="random_normal", trainable=True
        )
        self.W_K = self.add_weight(
            name="W_K", shape=(k, d), initializer="random_normal", trainable=True
        )
        self.W_Q = self.add_weight(
            name="W_Q", shape=(k, d), initializer="random_normal", trainable=True
        )
        self.softmax = Softmax()

    def call(self, inputs):
        # Forward pass
        n = tf.shape(inputs)[2]
        b = tf.shape(inputs)[0]
        x_transpose = tf.transpose(inputs, perm=[0, 2, 1])

        D = tf.zeros((n, n))
        i1, i2 = tf.linalg.band_part(tf.ones((n, n)), -1, 0) - 1
        D = tf.where(i1 == 0, x=tf.constant(-float("inf")), y=D)

        A = self.softmax(
            tf.einsum("bij,jn,nk,bkt->bit", x_transpose, self.W_Q, self.W_K, inputs) + D
        )
        output = inputs + tf.einsum(
            "in, nj, ajk,akt->aik", self.W_O, self.W_V, inputs, A
        )

        return output


class TestAttention(unittest.TestCase):

    def setUp(self):
        # Dimensions for the test
        self.d = 4  # Dimension of input
        self.k = 3  # Dimension of output
        self.batch_size = 2
        self.seq_length = 5

        # Initialize the Attention layer
        self.attention = layers.Attention(self.d, self.k)

        # Generate a random input of shape (batch_size, d, seq_length)
        self.input = np.random.rand(self.batch_size, self.d, self.seq_length)

        # Generate a random gradient of the same shape as input for backward pass
        self.grad_output = np.random.rand(self.batch_size, self.d, self.seq_length)

    def test_forward_backward(self):
        # Forward pass
        output = self.attention.forward(self.input)
        self.assertEqual(output.shape, (self.batch_size, self.d, self.seq_length))

        # Backward pass
        grad_input = self.attention.backward(self.grad_output)
        self.assertEqual(grad_input.shape, (self.batch_size, self.d, self.seq_length))

    def test_parameter_update(self):
        # Perform a forward and backward pass
        self.attention.forward(self.input)
        self.attention.backward(self.grad_output)

        # Store old parameters for comparison
        old_params = {
            key: param["w"].copy() for key, param in self.attention.params.items()
        }

        # Simple gradient descent update
        learning_rate = 0.01
        for param in self.attention.params.values():
            param["w"] -= learning_rate * param["d"]

        # Check if parameters are updated (not equal to old parameters)
        for key, old_w in old_params.items():
            new_w = self.attention.params[key]["w"]
            with self.subTest(param=key):
                self.assertFalse(
                    np.array_equal(old_w, new_w), f"Parameter {key} was not updated"
                )


class TestAttentionLayer:
    def setUp(self):
        r = 5
        m = 2
        num_of_samples = 250
        num_train_batches = 10
        num_test_batches = 1
        data = get_train_test_sorting(
            r, m, num_of_samples, num_train_batches, num_test_batches
        )
        self.x = data["x_train"]
        self.y = data["y_train"]
        self.custom_attention_layer = layers.Attention(d=10, k=5)
        self.tf_attention_layer = AttentionTF(d=10, k=5)

    def test_attention_output(self):
        # Process input through custom attention layer
        custom_output = self.custom_attention_layer.forward(self.x)

        # Process input through TensorFlow attention layer
        # Note: This assumes AttentionTF is integrated within a TensorFlow model
        inputs = Input(shape=(None, self.x.shape[-1]))
        attention_output = AttentionTF(d=10, k=5)(inputs)
        model = Model(inputs=inputs, outputs=attention_output)
        tf_output = model.predict(self.x)

        # Verify outputs are close enough
        np.testing.assert_almost_equal(custom_output, tf_output, decimal=5)


class TestSoftmaxLayer(unittest.TestCase):
    def setUp(self):
        self.softmax = layers.Softmax()
        self.input_data = np.random.randn(3, 5)  # Batch size of 3, 5 classes
        self.grad_output = np.random.randn(3, 5)

    def test_forward_output(self):
        output = self.softmax.forward(self.input_data)
        # Check if softmax output is correctly normalized
        for row in output:
            self.assertAlmostEqual(np.sum(row), 1.0)

    # Ikke brukt AI
    def test_forward_output_example(self):
        softb = layers.Softmax()
        example_input = np.array(
            [[-4.1, 2.2, 3.0, -0.1], [0.1, 0.1, 1.2, 0.3], [-3.6, -1.1, 3.9, -0.1]]
        )
        output = softb.forward(example_input)
        expected_output = tf.nn.softmax(example_input)
        self.assertTrue(np.allclose(output, expected_output, atol=1e-5))

    def test_backward_shape(self):
        self.softmax.forward(self.input_data)  # Forward pass to set up for backward
        grad_input = self.softmax.backward(self.grad_output)
        self.assertEqual(grad_input.shape, self.input_data.shape)

    def test_gradients(self):
        self.softmax.forward(self.input_data)
        grad_input = self.softmax.backward(self.grad_output)
        # This is a basic check. In practice, you might want to check the correctness of the gradient values more thoroughly.
        self.assertFalse(np.array_equal(grad_input, np.zeros_like(grad_input)))


class TestCrossEntropyLayer(unittest.TestCase):
    def setUp(self):
        self.cross_entropy = layers.CrossEntropy()
        self.r = 5
        self.m = 2
        d = 10
        k = 5
        p = 15
        n_max = 2 * self.r - 1
        num_of_samples = 250
        num_train_batches = 10
        num_test_batches = 1
        data = get_train_test_sorting(
            self.r, self.m, num_of_samples, num_train_batches, num_test_batches
        )
        self.x = data["x_train"]
        self.y = data["y_train"]
        feed_forward1 = layers.FeedForward(d, p)
        attention1 = layers.Attention(d, k)
        embed_pos = layers.EmbedPosition(n_max, self.m, d)
        un_embed_pos = layers.LinearLayer(d, self.m)
        softmax = layers.Softmax()
        layers_ = [
            embed_pos,
            attention1,
            feed_forward1,
            un_embed_pos,
            softmax,
        ]
        self.neuralnet = NeuralNetwork(layers_)

    def test_backward_shape(self):
        X_batch = onehot(self.x[0], self.m)
        Z = self.neuralnet.forward(X_batch)
        self.cross_entropy.forward(Z, self.y[0][:, -self.r :])
        grad = self.cross_entropy.backward()
        # Ensure gradient shape matches the input x shape
        self.assertEqual(grad.shape, Z.shape)


unittest.main(argv=[''], verbosity=2, exit=False)

test_forward_backward (__main__.TestAttention) ... ok
test_parameter_update (__main__.TestAttention) ... ok
test_backward_shape (__main__.TestCrossEntropyLayer) ... ok
test_backward_shape (__main__.TestSoftmaxLayer) ... ok
test_forward_output (__main__.TestSoftmaxLayer) ... ok
test_forward_output_example (__main__.TestSoftmaxLayer) ... ok
test_gradients (__main__.TestSoftmaxLayer) ... ok

----------------------------------------------------------------------
Ran 7 tests in 0.139s

OK


<unittest.main.TestProgram at 0x2cea6c940>