<a href="https://colab.research.google.com/github/wentao0420/IANNwTF_Group40/blob/main/IANNwTF_week4_Group40.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **2.1 Preparing MNIST math dataset**

---

In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf
import datetime
import tqdm
import matplotlib.pyplot as plt

(train_ds, test_ds), ds_info = tfds.load('mnist', split = ['train', 'test'], as_supervised = True, with_info = True)


## **2.2.1 Preprocessing**

---

In [None]:
def preprocess(data, batchsize):
    # image should be float
    data = data.map(lambda x, t: (tf.cast(x, float), t))
    # image should be flattened
    data = data.map(lambda x, t: (tf.reshape(x, (-1,)), t))
    # image vector will here have values between -1 and 1
    data = data.map(lambda x,t: ((x/128.)-1., t))
    
    # pairing 2000 samples with other 2000
    zipped_ds = tf.data.Dataset.zip((data.shuffle(2000), data.shuffle(2000)))
    
    # subtask 1 (a-b>=y)
    subtask1 = zipped_ds.map(lambda x1, x2: (x1[0], x2[0], x1[1]+x2[1]>=5))
    subtask1 = subtask1.map(lambda x1, x2, t: (x1,x2, tf.cast(t, tf.int32)))
    
    # subtask 2 (a-b=y)
    subtask2 = zipped_ds.map(lambda x1, x2: (x1[0], x2[0], x1[1]-x2[1]))
    
    # batch & prefetch
    subtask1 = subtask1.batch(batchsize)
    subtask1 = subtask1.prefetch(tf.data.AUTOTUNE)
    subtask2 = subtask2.batch(batchsize)
    subtask2 = subtask2.prefetch(tf.data.AUTOTUNE)
    
    return subtask1, subtask2

train_ds_subtask1, train_ds_subtask2 = preprocess(train_ds, batchsize=32)
test_ds_subtask1, test_ds_subtask2 = preprocess(test_ds, batchsize=32)

## **2.2.2 Checking dataset**

---

In [None]:
# check the contents of the dataset
for img1, img2, label in train_ds_subtask1:
    print(img1.shape, img2.shape, label.shape)
    break

for img1, img2, label in train_ds_subtask2:
    print(img1.shape, img2.shape, label.shape)
    break

for img1, img2, label in test_ds_subtask1:
    print(img1.shape, img2.shape, label.shape)
    break

for img1, img2, label in test_ds_subtask2:
    print(img1.shape, img2.shape, label.shape)
    break

(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)


## **3 Building shared weight models**

---

In [None]:
class TwinMNISTModel(tf.keras.Model):

    def __init__(self, optimizer, subtask):
        super().__init__()
        # inherit functionality from parent class

        # optimizer, loss function and metrics
        self.metrics_list = [tf.keras.metrics.BinaryAccuracy(),
                             tf.keras.metrics.Mean(name="loss")]
        
        self.optimizer = optimizer

        self.subtask = subtask

        # layers to encode the images (both layers used for both images)
        self.dense1 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        
        self.dense3 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        
        if subtask == 0:
          self.out_layer = tf.keras.layers.Dense(1,activation=tf.nn.sigmoid)
          self.loss_function = tf.keras.losses.BinaryCrossentropy()
        
        else:
          self.out_layer = tf.keras.layers.Dense(1,activation=tf.nn.softmax)
          self.loss_function = tf.keras.losses.MeanSquaredError()
        
    def call(self, images, training=False):
        img1, img2 = images
        
        img1_x = self.dense1(img1)
        img1_x = self.dense2(img1_x)
        
        img2_x = self.dense1(img2)
        img2_x = self.dense2(img2_x)
        
        combined_x = tf.concat([img1_x, img2_x ], axis=1)
        combined_x = self.dense3(combined_x)
        return self.out_layer(combined_x)

    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model

    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    @tf.function
    def train_step(self, data):
        img1, img2, label = data
        
        with tf.GradientTape() as tape:
            output = self((img1, img2), training=True)
            loss = self.loss_function(label, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update the state of the metrics according to loss
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        # return a dictionary with metric names as keys and metric results as values
        return {m.name : m.result() for m in self.metrics}

    @tf.function
    def test_step(self, data):
        img1, img2, label = data
        # same as train step (without parameter updates)
        output = self((img1, img2), training=False)
        loss = self.loss_function(label, output)
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        return {m.name : m.result() for m in self.metrics}

In [None]:
def create_summary_writers(config_name):
    
    # Define where to save the logs
    # along with this, you may want to save a config file with the same name so you know what the hyperparameters were used
    # alternatively make a copy of the code that is used for later reference
    
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    train_log_path = f"logs/{config_name}/{current_time}/train"
    val_log_path = f"logs/{config_name}/{current_time}/val"

    # log writer for training metrics
    train_summary_writer = tf.summary.create_file_writer(train_log_path)

    # log writer for validation metrics
    val_summary_writer = tf.summary.create_file_writer(val_log_path)
    
    return train_summary_writer, val_summary_writer

train_summary_writer_subtask1, val_summary_writer_subtask1 = create_summary_writers(config_name="RUN1")
train_summary_writer_subtask2, val_summary_writer_subtask2 = create_summary_writers(config_name="RUN2")

## **4 Training the networks**

---

In [None]:
def training_loop(model, subtask, optimizer, start_epoch,
                  epochs, train_summary_writer, 
                  val_summary_writer, save_path):

    if subtask == 0:
      training = train_ds_subtask1
      testing = test_ds_subtask1
    
    else:
      training = train_ds_subtask2
      testing = test_ds_subtask2

    # iterate over epochs
    for e in range(start_epoch, epochs):

        # train steps on all batches in the training data
        for data in tqdm.tqdm(training, position=0, leave=True):
            metrics = model.train_step(data)

        # log and print training metrics
        with train_summary_writer.as_default():
            # for scalar metrics:
            for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=e)
        
        # print the metrics
        print([f"{key}: {value.numpy()}" for (key, value) in metrics.items()])
        
        # reset metric objects
        model.reset_metrics()

        # evaluate on validation data
        for data in testing:
            metrics = model.test_step(data)
        
        # log validation metrics
        with val_summary_writer.as_default():
            # for scalar metrics:
            for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=e)
            
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])
 
        # reset metric objects
        model.reset_metrics()
        
    # save model weights if save_path is given
    if save_path:
        model.save_weights(save_path)

## **5 Experiments**

---

In [None]:
# Subtask 1 - Adam Optimizer

# instantiate model
model = TwinMNISTModel(subtask=0, optimizer= tf.keras.optimizers.Adam())

# choose a path to save the weights
save_path = "trained_model_RUN1"

# pass arguments to training loop function
training_loop(model=model, subtask=0,
    optimizer= tf.keras.optimizers.Adam(),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask1,
    val_summary_writer=val_summary_writer_subtask1,
    save_path=save_path)

100%|██████████| 1875/1875 [00:20<00:00, 91.53it/s] 


['binary_accuracy: 0.9408666491508484', 'loss: 0.15178029239177704']
['val_binary_accuracy: 0.9653000235557556', 'val_loss: 0.09714941680431366']


100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 


['binary_accuracy: 0.9713500142097473', 'loss: 0.07967220991849899']
['val_binary_accuracy: 0.9732999801635742', 'val_loss: 0.07003185153007507']


100%|██████████| 1875/1875 [00:20<00:00, 91.53it/s] 


['binary_accuracy: 0.9764000177383423', 'loss: 0.06658919900655746']
['val_binary_accuracy: 0.9714000225067139', 'val_loss: 0.0856703519821167']


100%|██████████| 1875/1875 [00:15<00:00, 124.34it/s]


['binary_accuracy: 0.9797333478927612', 'loss: 0.05724755674600601']
['val_binary_accuracy: 0.9843999743461609', 'val_loss: 0.047639355063438416']


100%|██████████| 1875/1875 [00:14<00:00, 125.82it/s]


['binary_accuracy: 0.9815166592597961', 'loss: 0.053443558514118195']
['val_binary_accuracy: 0.9843999743461609', 'val_loss: 0.050322070717811584']


100%|██████████| 1875/1875 [00:20<00:00, 91.50it/s] 


['binary_accuracy: 0.982783317565918', 'loss: 0.048555370420217514']
['val_binary_accuracy: 0.9783999919891357', 'val_loss: 0.0639566108584404']


100%|██████████| 1875/1875 [00:20<00:00, 91.51it/s] 


['binary_accuracy: 0.9844333529472351', 'loss: 0.04525556415319443']
['val_binary_accuracy: 0.9832000136375427', 'val_loss: 0.05592930689454079']


100%|██████████| 1875/1875 [00:15<00:00, 122.96it/s]


['binary_accuracy: 0.9862333536148071', 'loss: 0.04039006307721138']
['val_binary_accuracy: 0.982200026512146', 'val_loss: 0.05373512953519821']


100%|██████████| 1875/1875 [00:20<00:00, 91.53it/s] 


['binary_accuracy: 0.9865999817848206', 'loss: 0.039947159588336945']
['val_binary_accuracy: 0.983299970626831', 'val_loss: 0.049364008009433746']


100%|██████████| 1875/1875 [00:15<00:00, 118.90it/s]


['binary_accuracy: 0.9861166477203369', 'loss: 0.03986343741416931']
['val_binary_accuracy: 0.982200026512146', 'val_loss: 0.05591191351413727']


In [None]:
# Subtask 1 - SGD Optimizer without momentum

# instantiate model
model = TwinMNISTModel(subtask=0, optimizer= tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0))

# choose a path to save the weights
save_path = "trained_model_RUN1"

# pass arguments to training loop function
training_loop(model=model, subtask=0,
    optimizer= tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask1,
    val_summary_writer=val_summary_writer_subtask1,
    save_path=save_path)

100%|██████████| 1875/1875 [00:17<00:00, 107.66it/s]


['binary_accuracy: 0.9331499934196472', 'loss: 0.169295996427536']
['val_binary_accuracy: 0.9693999886512756', 'val_loss: 0.08815091103315353']


100%|██████████| 1875/1875 [00:20<00:00, 91.53it/s] 


['binary_accuracy: 0.968583345413208', 'loss: 0.08547843992710114']
['val_binary_accuracy: 0.9758999943733215', 'val_loss: 0.07266151905059814']


100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 


['binary_accuracy: 0.9763500094413757', 'loss: 0.06703048944473267']
['val_binary_accuracy: 0.9810000061988831', 'val_loss: 0.058244865387678146']


100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 


['binary_accuracy: 0.9789833426475525', 'loss: 0.05875303968787193']
['val_binary_accuracy: 0.9821000099182129', 'val_loss: 0.05345512181520462']


100%|██████████| 1875/1875 [00:20<00:00, 91.52it/s] 


['binary_accuracy: 0.9831833243370056', 'loss: 0.04852943494915962']
['val_binary_accuracy: 0.9846000075340271', 'val_loss: 0.04731927067041397']


100%|██████████| 1875/1875 [00:17<00:00, 110.29it/s]


['binary_accuracy: 0.984250009059906', 'loss: 0.044321462512016296']
['val_binary_accuracy: 0.9848999977111816', 'val_loss: 0.045952681452035904']


100%|██████████| 1875/1875 [00:20<00:00, 91.52it/s] 


['binary_accuracy: 0.9860666394233704', 'loss: 0.040131621062755585']
['val_binary_accuracy: 0.9869999885559082', 'val_loss: 0.04251562058925629']


100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 


['binary_accuracy: 0.9874833226203918', 'loss: 0.03705759346485138']
['val_binary_accuracy: 0.9858999848365784', 'val_loss: 0.041828446090221405']


100%|██████████| 1875/1875 [00:14<00:00, 125.10it/s]


['binary_accuracy: 0.987766683101654', 'loss: 0.034938614815473557']
['val_binary_accuracy: 0.9855999946594238', 'val_loss: 0.04400060325860977']


100%|██████████| 1875/1875 [00:16<00:00, 115.69it/s]


['binary_accuracy: 0.9884333610534668', 'loss: 0.03326971083879471']
['val_binary_accuracy: 0.9879000186920166', 'val_loss: 0.03760586306452751']


In [None]:
# Subtask 1 - SGD Optimizer with momentum

# instantiate model
model = TwinMNISTModel(subtask=0, optimizer= tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.2))

# choose a path to save the weights
save_path = "trained_model_RUN1"

# pass arguments to training loop function
training_loop(model=model, subtask=0,
    optimizer= tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.2),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask1,
    val_summary_writer=val_summary_writer_subtask1,
    save_path=save_path)

100%|██████████| 1875/1875 [00:15<00:00, 118.33it/s]


['binary_accuracy: 0.9291666746139526', 'loss: 0.17668986320495605']
['val_binary_accuracy: 0.9696999788284302', 'val_loss: 0.0853545069694519']


100%|██████████| 1875/1875 [00:20<00:00, 90.73it/s] 


['binary_accuracy: 0.9682499766349792', 'loss: 0.08893145620822906']
['val_binary_accuracy: 0.9704999923706055', 'val_loss: 0.08380108326673508']


100%|██████████| 1875/1875 [00:17<00:00, 106.82it/s]


['binary_accuracy: 0.9758999943733215', 'loss: 0.06862276047468185']
['val_binary_accuracy: 0.9761000275611877', 'val_loss: 0.061396725475788116']


100%|██████████| 1875/1875 [00:18<00:00, 102.09it/s]


['binary_accuracy: 0.9799833297729492', 'loss: 0.057545050978660583']
['val_binary_accuracy: 0.9799000024795532', 'val_loss: 0.0553869865834713']


100%|██████████| 1875/1875 [00:19<00:00, 97.07it/s] 


['binary_accuracy: 0.9825166463851929', 'loss: 0.05019189789891243']
['val_binary_accuracy: 0.9835000038146973', 'val_loss: 0.04737605154514313']


100%|██████████| 1875/1875 [00:20<00:00, 91.49it/s] 


['binary_accuracy: 0.9843166470527649', 'loss: 0.04577626660466194']
['val_binary_accuracy: 0.9860000014305115', 'val_loss: 0.04392455145716667']


100%|██████████| 1875/1875 [00:15<00:00, 118.91it/s]


['binary_accuracy: 0.9848999977111816', 'loss: 0.043649423867464066']
['val_binary_accuracy: 0.98089998960495', 'val_loss: 0.05731594190001488']


100%|██████████| 1875/1875 [00:20<00:00, 91.52it/s] 


['binary_accuracy: 0.9866166710853577', 'loss: 0.038584720343351364']
['val_binary_accuracy: 0.986299991607666', 'val_loss: 0.04313533380627632']


100%|██████████| 1875/1875 [00:20<00:00, 91.51it/s] 


['binary_accuracy: 0.986466646194458', 'loss: 0.0374261736869812']
['val_binary_accuracy: 0.986299991607666', 'val_loss: 0.04007863998413086']


100%|██████████| 1875/1875 [00:19<00:00, 94.04it/s] 


['binary_accuracy: 0.9892500042915344', 'loss: 0.03334701061248779']
['val_binary_accuracy: 0.9832000136375427', 'val_loss: 0.04952998459339142']


In [None]:
# Subtask 1 - AdaGrad Optimizer

# instantiate model
model = TwinMNISTModel(subtask=0, optimizer= tf.keras.optimizers.Adagrad())

# choose a path to save the weights
save_path = "trained_model_RUN1"

# pass arguments to training loop function
training_loop(model=model, subtask=0,
    optimizer= tf.keras.optimizers.Adagrad(),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask1,
    val_summary_writer=val_summary_writer_subtask1,
    save_path=save_path)

100%|██████████| 1875/1875 [00:20<00:00, 91.46it/s] 


['binary_accuracy: 0.8735166788101196', 'loss: 0.307016521692276']
['val_binary_accuracy: 0.9009000062942505', 'val_loss: 0.2403934746980667']


100%|██████████| 1875/1875 [00:18<00:00, 100.83it/s]


['binary_accuracy: 0.9011499881744385', 'loss: 0.24735693633556366']
['val_binary_accuracy: 0.9144999980926514', 'val_loss: 0.2223970741033554']


100%|██████████| 1875/1875 [00:40<00:00, 45.75it/s] 


['binary_accuracy: 0.9117333292961121', 'loss: 0.22527524828910828']
['val_binary_accuracy: 0.9161999821662903', 'val_loss: 0.21195083856582642']


100%|██████████| 1875/1875 [00:16<00:00, 111.65it/s]


['binary_accuracy: 0.9162166714668274', 'loss: 0.21185410022735596']
['val_binary_accuracy: 0.9210000038146973', 'val_loss: 0.19425123929977417']


100%|██████████| 1875/1875 [00:16<00:00, 114.62it/s]


['binary_accuracy: 0.9203000068664551', 'loss: 0.20085349678993225']
['val_binary_accuracy: 0.926800012588501', 'val_loss: 0.1895764321088791']


100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 


['binary_accuracy: 0.9231833219528198', 'loss: 0.19286781549453735']
['val_binary_accuracy: 0.9282000064849854', 'val_loss: 0.1839570552110672']


100%|██████████| 1875/1875 [00:15<00:00, 123.67it/s]


['binary_accuracy: 0.9300333261489868', 'loss: 0.18133363127708435']
['val_binary_accuracy: 0.930400013923645', 'val_loss: 0.1754288524389267']


100%|██████████| 1875/1875 [00:20<00:00, 91.53it/s] 


['binary_accuracy: 0.9308666586875916', 'loss: 0.17448915541172028']
['val_binary_accuracy: 0.9289000034332275', 'val_loss: 0.17292532324790955']


100%|██████████| 1875/1875 [00:20<00:00, 91.53it/s] 


['binary_accuracy: 0.9330333471298218', 'loss: 0.16928930580615997']
['val_binary_accuracy: 0.9379000067710876', 'val_loss: 0.1570131927728653']


100%|██████████| 1875/1875 [00:15<00:00, 118.62it/s]


['binary_accuracy: 0.9380499720573425', 'loss: 0.1591188609600067']
['val_binary_accuracy: 0.9417999982833862', 'val_loss: 0.1495475322008133']


In [None]:
# Subtask 1 - RMSProp Optimizer

# instantiate model
model = TwinMNISTModel(subtask=0, optimizer= tf.keras.optimizers.RMSprop())

# choose a path to save the weights
save_path = "trained_model_RUN1"

# pass arguments to training loop function
training_loop(model=model, subtask=0,
    optimizer= tf.keras.optimizers.RMSprop(),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask1,
    val_summary_writer=val_summary_writer_subtask1,
    save_path=save_path)

100%|██████████| 1875/1875 [00:17<00:00, 106.96it/s]


['binary_accuracy: 0.9376999735832214', 'loss: 0.16147851943969727']
['val_binary_accuracy: 0.9702000021934509', 'val_loss: 0.0957457423210144']


100%|██████████| 1875/1875 [00:20<00:00, 90.07it/s] 


['binary_accuracy: 0.9660333395004272', 'loss: 0.09932348132133484']
['val_binary_accuracy: 0.972599983215332', 'val_loss: 0.09319237619638443']


100%|██████████| 1875/1875 [00:20<00:00, 90.42it/s] 


['binary_accuracy: 0.968583345413208', 'loss: 0.10031541436910629']
['val_binary_accuracy: 0.9621999859809875', 'val_loss: 0.10933385789394379']


100%|██████████| 1875/1875 [00:21<00:00, 89.01it/s] 


['binary_accuracy: 0.9689499735832214', 'loss: 0.10345608741044998']
['val_binary_accuracy: 0.9578999876976013', 'val_loss: 0.14959388971328735']


100%|██████████| 1875/1875 [00:16<00:00, 112.22it/s]


['binary_accuracy: 0.9702000021934509', 'loss: 0.1024802178144455']
['val_binary_accuracy: 0.9664000272750854', 'val_loss: 0.08993273228406906']


100%|██████████| 1875/1875 [00:16<00:00, 112.82it/s]


['binary_accuracy: 0.9713333249092102', 'loss: 0.09660090506076813']
['val_binary_accuracy: 0.9796000123023987', 'val_loss: 0.06468665599822998']


100%|██████████| 1875/1875 [00:16<00:00, 111.90it/s]


['binary_accuracy: 0.9717166423797607', 'loss: 0.09541638195514679']
['val_binary_accuracy: 0.9670000076293945', 'val_loss: 0.1146751120686531']


100%|██████████| 1875/1875 [00:16<00:00, 113.95it/s]


['binary_accuracy: 0.9714999794960022', 'loss: 0.09693825989961624']
['val_binary_accuracy: 0.9714999794960022', 'val_loss: 0.09603094309568405']


100%|██████████| 1875/1875 [00:17<00:00, 108.40it/s]


['binary_accuracy: 0.9728500247001648', 'loss: 0.09948007762432098']
['val_binary_accuracy: 0.968500018119812', 'val_loss: 0.15056847035884857']


100%|██████████| 1875/1875 [00:16<00:00, 111.93it/s]


['binary_accuracy: 0.9738333225250244', 'loss: 0.09993530809879303']
['val_binary_accuracy: 0.982200026512146', 'val_loss: 0.06740312278270721']


In [None]:
# Subtask 2 - Adam Optimizer

# instantiate model
model = TwinMNISTModel(subtask=1, optimizer= tf.keras.optimizers.Adam())

# choose a path to save the weights
save_path = "trained_model_RUN2"

# pass arguments to training loop function
training_loop(model=model, subtask=1,
    optimizer= tf.keras.optimizers.Adam(),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask2,
    val_summary_writer=val_summary_writer_subtask2,
    save_path=save_path)

100%|██████████| 1875/1875 [00:16<00:00, 110.67it/s]


['binary_accuracy: 0.08988333493471146', 'loss: 17.681734085083008']
['val_binary_accuracy: 0.0925000011920929', 'val_loss: 17.62100601196289']


100%|██████████| 1875/1875 [00:17<00:00, 109.21it/s]


['binary_accuracy: 0.09033333510160446', 'loss: 17.65250015258789']
['val_binary_accuracy: 0.08910000324249268', 'val_loss: 17.549020767211914']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.08848333358764648', 'loss: 17.668066024780273']
['val_binary_accuracy: 0.09239999949932098', 'val_loss: 17.69289207458496']


100%|██████████| 1875/1875 [00:20<00:00, 91.55it/s] 


['binary_accuracy: 0.09070000052452087', 'loss: 17.615833282470703']
['val_binary_accuracy: 0.08869999647140503', 'val_loss: 17.947383880615234']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.08974999934434891', 'loss: 17.71453285217285']
['val_binary_accuracy: 0.08919999748468399', 'val_loss: 17.69868278503418']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.09048333019018173', 'loss: 17.670866012573242']
['val_binary_accuracy: 0.08760000020265579', 'val_loss: 17.797025680541992']


100%|██████████| 1875/1875 [00:14<00:00, 125.30it/s]


['binary_accuracy: 0.09083333611488342', 'loss: 17.75553321838379']
['val_binary_accuracy: 0.09019999951124191', 'val_loss: 17.57288360595703']


100%|██████████| 1875/1875 [00:14<00:00, 127.37it/s]


['binary_accuracy: 0.08980000019073486', 'loss: 17.74886703491211']
['val_binary_accuracy: 0.08479999750852585', 'val_loss: 17.956769943237305']


100%|██████████| 1875/1875 [00:14<00:00, 127.51it/s]


['binary_accuracy: 0.09099999815225601', 'loss: 17.68796730041504']
['val_binary_accuracy: 0.0868000015616417', 'val_loss: 17.77985191345215']


100%|██████████| 1875/1875 [00:14<00:00, 127.74it/s]


['binary_accuracy: 0.09201666712760925', 'loss: 17.66276741027832']
['val_binary_accuracy: 0.08860000222921371', 'val_loss: 17.576078414916992']


In [None]:
# Subtask 2 - SGD Optimizer without momentum

# instantiate model
model = TwinMNISTModel(subtask=1, optimizer= tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0))

# choose a path to save the weights
save_path = "trained_model_RUN2"

# pass arguments to training loop function
training_loop(model=model, subtask=1,
    optimizer= tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask2,
    val_summary_writer=val_summary_writer_subtask2,
    save_path=save_path)

100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.08871666342020035', 'loss: 17.681333541870117']
['val_binary_accuracy: 0.08919999748468399', 'val_loss: 17.803813934326172']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.08746666461229324', 'loss: 17.807567596435547']
['val_binary_accuracy: 0.09030000120401382', 'val_loss: 17.644569396972656']


100%|██████████| 1875/1875 [00:14<00:00, 131.79it/s]


['binary_accuracy: 0.09061666578054428', 'loss: 17.729999542236328']
['val_binary_accuracy: 0.09070000052452087', 'val_loss: 17.720746994018555']


100%|██████████| 1875/1875 [00:20<00:00, 91.55it/s] 


['binary_accuracy: 0.09025000035762787', 'loss: 17.693132400512695']
['val_binary_accuracy: 0.09080000221729279', 'val_loss: 17.9841251373291']


100%|██████████| 1875/1875 [00:15<00:00, 124.00it/s]


['binary_accuracy: 0.09216666966676712', 'loss: 17.649166107177734']
['val_binary_accuracy: 0.08959999680519104', 'val_loss: 17.55061912536621']


100%|██████████| 1875/1875 [00:20<00:00, 91.55it/s] 


['binary_accuracy: 0.09031666815280914', 'loss: 17.61240005493164']
['val_binary_accuracy: 0.08839999884366989', 'val_loss: 17.773662567138672']


100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 


['binary_accuracy: 0.08968333154916763', 'loss: 17.770000457763672']
['val_binary_accuracy: 0.0877000018954277', 'val_loss: 17.8886775970459']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.0913499966263771', 'loss: 17.736732482910156']
['val_binary_accuracy: 0.08399999886751175', 'val_loss: 17.578275680541992']


100%|██████████| 1875/1875 [00:14<00:00, 131.20it/s]


['binary_accuracy: 0.09094999730587006', 'loss: 17.596866607666016']
['val_binary_accuracy: 0.09139999747276306', 'val_loss: 17.530351638793945']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.09158333390951157', 'loss: 17.702667236328125']
['val_binary_accuracy: 0.08720000088214874', 'val_loss: 17.928813934326172']


In [None]:
# Subtask 2 - SGD Optimizer with momentum

# instantiate model
model = TwinMNISTModel(subtask=1, optimizer= tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.2))

# choose a path to save the weights
save_path = "trained_model_RUN2"

# pass arguments to training loop function
training_loop(model=model, subtask=1,
    optimizer= tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.2),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask2,
    val_summary_writer=val_summary_writer_subtask2,
    save_path=save_path)

100%|██████████| 1875/1875 [00:16<00:00, 116.39it/s]


['binary_accuracy: 0.08980000019073486', 'loss: 17.70516586303711']
['val_binary_accuracy: 0.09229999780654907', 'val_loss: 17.632787704467773']


100%|██████████| 1875/1875 [00:14<00:00, 126.99it/s]


['binary_accuracy: 0.08921666443347931', 'loss: 17.69886589050293']
['val_binary_accuracy: 0.09300000220537186', 'val_loss: 17.667831420898438']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.09163333475589752', 'loss: 17.702566146850586']
['val_binary_accuracy: 0.09229999780654907', 'val_loss: 17.25359344482422']


100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 


['binary_accuracy: 0.08990000188350677', 'loss: 17.74839973449707']
['val_binary_accuracy: 0.09019999951124191', 'val_loss: 17.928115844726562']


100%|██████████| 1875/1875 [00:15<00:00, 124.30it/s]


['binary_accuracy: 0.08969999849796295', 'loss: 17.650299072265625']
['val_binary_accuracy: 0.08479999750852585', 'val_loss: 17.74810218811035']


100%|██████████| 1875/1875 [00:14<00:00, 131.08it/s]


['binary_accuracy: 0.08990000188350677', 'loss: 17.69183349609375']
['val_binary_accuracy: 0.08919999748468399', 'val_loss: 17.678115844726562']


100%|██████████| 1875/1875 [00:14<00:00, 126.48it/s]


['binary_accuracy: 0.0904499962925911', 'loss: 17.70133399963379']
['val_binary_accuracy: 0.08789999783039093', 'val_loss: 17.637880325317383']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.09025000035762787', 'loss: 17.70159912109375']
['val_binary_accuracy: 0.09390000253915787', 'val_loss: 17.620407104492188']


100%|██████████| 1875/1875 [00:17<00:00, 107.49it/s]


['binary_accuracy: 0.08958332985639572', 'loss: 17.654733657836914']
['val_binary_accuracy: 0.08869999647140503', 'val_loss: 17.97164535522461']


100%|██████████| 1875/1875 [00:20<00:00, 91.55it/s] 


['binary_accuracy: 0.09141666442155838', 'loss: 17.748233795166016']
['val_binary_accuracy: 0.08789999783039093', 'val_loss: 17.690195083618164']


In [None]:
# Subtask 2 - AdaGrad Optimizer

# instantiate model
model = TwinMNISTModel(subtask=1, optimizer= tf.keras.optimizers.Adagrad())

# choose a path to save the weights
save_path = "trained_model_RUN2"

# pass arguments to training loop function
training_loop(model=model, subtask=1,
    optimizer= tf.keras.optimizers.Adagrad(),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask2,
    val_summary_writer=val_summary_writer_subtask2,
    save_path=save_path)

100%|██████████| 1875/1875 [00:15<00:00, 121.65it/s]


['binary_accuracy: 0.09040000289678574', 'loss: 17.633432388305664']
['val_binary_accuracy: 0.09229999780654907', 'val_loss: 17.666433334350586']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.0916166678071022', 'loss: 17.606800079345703']
['val_binary_accuracy: 0.08560000360012054', 'val_loss: 17.936901092529297']


100%|██████████| 1875/1875 [00:13<00:00, 136.91it/s]


['binary_accuracy: 0.09234999865293503', 'loss: 17.774932861328125']
['val_binary_accuracy: 0.09510000050067902', 'val_loss: 17.559904098510742']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.08985000103712082', 'loss: 17.687833786010742']
['val_binary_accuracy: 0.0860000029206276', 'val_loss: 17.94788360595703']


100%|██████████| 1875/1875 [00:20<00:00, 91.57it/s] 


['binary_accuracy: 0.09009999781847', 'loss: 17.675233840942383']
['val_binary_accuracy: 0.08569999784231186', 'val_loss: 17.82358169555664']


100%|██████████| 1875/1875 [00:13<00:00, 134.81it/s]


['binary_accuracy: 0.0904499962925911', 'loss: 17.641799926757812']
['val_binary_accuracy: 0.09040000289678574', 'val_loss: 17.72843360900879']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.08873333036899567', 'loss: 17.82196617126465']
['val_binary_accuracy: 0.09059999883174896', 'val_loss: 17.69139289855957']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.09133332967758179', 'loss: 17.68790054321289']
['val_binary_accuracy: 0.09030000120401382', 'val_loss: 17.62420082092285']


100%|██████████| 1875/1875 [00:20<00:00, 91.55it/s] 


['binary_accuracy: 0.08985000103712082', 'loss: 17.858699798583984']
['val_binary_accuracy: 0.08749999850988388', 'val_loss: 17.84584617614746']


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['binary_accuracy: 0.09096666425466537', 'loss: 17.61753273010254']
['val_binary_accuracy: 0.08869999647140503', 'val_loss: 17.79852294921875']


In [None]:
# Subtask 2 - RMSProp Optimizer

# instantiate model
model = TwinMNISTModel(subtask=1, optimizer= tf.keras.optimizers.RMSprop())

# choose a path to save the weights
save_path = "trained_model_RUN2"

# pass arguments to training loop function
training_loop(model=model, subtask=1,
    optimizer= tf.keras.optimizers.RMSprop(),
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask2,
    val_summary_writer=val_summary_writer_subtask2,
    save_path=save_path)

100%|██████████| 1875/1875 [00:16<00:00, 114.97it/s]


['binary_accuracy: 0.08846666663885117', 'loss: 17.712066650390625']
['val_binary_accuracy: 0.08640000224113464', 'val_loss: 17.599639892578125']


100%|██████████| 1875/1875 [00:15<00:00, 119.14it/s]


['binary_accuracy: 0.0902833342552185', 'loss: 17.739032745361328']
['val_binary_accuracy: 0.09099999815225601', 'val_loss: 17.626996994018555']


100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 


['binary_accuracy: 0.09139999747276306', 'loss: 17.737667083740234']
['val_binary_accuracy: 0.09109999984502792', 'val_loss: 17.648561477661133']


100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 


['binary_accuracy: 0.0910833328962326', 'loss: 17.527433395385742']
['val_binary_accuracy: 0.08889999985694885', 'val_loss: 17.828575134277344']


100%|██████████| 1875/1875 [00:15<00:00, 120.92it/s]


['binary_accuracy: 0.09120000153779984', 'loss: 17.710567474365234']
['val_binary_accuracy: 0.09260000288486481', 'val_loss: 17.38328742980957']


100%|██████████| 1875/1875 [00:15<00:00, 117.90it/s]


['binary_accuracy: 0.09123333543539047', 'loss: 17.777999877929688']
['val_binary_accuracy: 0.09359999746084213', 'val_loss: 17.888578414916992']


100%|██████████| 1875/1875 [00:15<00:00, 118.14it/s]


['binary_accuracy: 0.09014999866485596', 'loss: 17.749967575073242']
['val_binary_accuracy: 0.09000000357627869', 'val_loss: 17.765974044799805']


100%|██████████| 1875/1875 [00:15<00:00, 119.04it/s]


['binary_accuracy: 0.08781666308641434', 'loss: 17.698766708374023']
['val_binary_accuracy: 0.08889999985694885', 'val_loss: 18.077375411987305']


100%|██████████| 1875/1875 [00:16<00:00, 112.58it/s]


['binary_accuracy: 0.0924166664481163', 'loss: 17.55656623840332']
['val_binary_accuracy: 0.08879999816417694', 'val_loss: 17.66024398803711']


100%|██████████| 1875/1875 [00:16<00:00, 115.26it/s]


['binary_accuracy: 0.09043333679437637', 'loss: 17.710399627685547']
['val_binary_accuracy: 0.08749999850988388', 'val_loss: 17.853633880615234']


## **6 Visualization**

---

Tried to visualize the data (accuracy and loss) by pulling them out from [train_summary_writer] and [val_summary_writer] but couldn't get the code right. 

## **7 Observation**

---



1. In Subtask 1, SGD Optimizer without momentum shows a better trend of accuracy among 5 optimizers.
  *   Explain Adam optimizer's trend regarding accuracy and loss
  *   Explain SGD optimizer (without momentum)'s trend regarding accuracy and loss
  *   Explain SGD optimizer (with momentum)'s trend regarding accuracy and loss
  *   Explain AdaGrad optimizer's trend regarding accuracy and loss
  *   Explain RMSProp optimizer's trend regarding accuracy and loss

2. In Subtask 2, Adam optimizer shows a better trend of accuracy among 5 optimizers.
  *   Explain Adam optimizer's trend regarding accuracy and loss
  *   Explain SGD optimizer (without momentum)'s trend regarding accuracy and loss
  *   Explain SGD optimizer (with momentum)'s trend regarding accuracy and loss
  *   Explain AdaGrad optimizer's trend regarding accuracy and loss
  *   Explain RMSProp optimizer's trend regarding accuracy and loss





