In [1]:
!pip install wandb



In [2]:
import wandb
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mviinod9[0m ([33mviinod9-iitm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
# Import necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import fashion_mnist

# Load and preprocess dataset
def load_and_preprocess_data():
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

    # Split into train and validation
    val_size = 5000
    x_val, y_val = x_train[:val_size], y_train[:val_size]
    x_train, y_train = x_train[val_size:], y_train[val_size:]

    # Normalize dataset
    x_train, x_val, x_test = x_train / 255.0, x_val / 255.0, x_test / 255.0

    # One-hot encoding
    y_train = to_categorical(y_train, 10)
    y_val = to_categorical(y_val, 10)
    y_test = to_categorical(y_test, 10)

    return x_train, y_train, x_val, y_val, x_test, y_test

# Load data
x_train, y_train, x_val, y_val, x_test, y_test = load_and_preprocess_data()

# Define necessary functions
def initialize_weights(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size):
    weights = {}
    prev_size = input_size
    hidden_size = num_nodes_hidden_layers[0] if isinstance(num_nodes_hidden_layers, list) else num_nodes_hidden_layers

    for i in range(num_hidden_layer):
        if weight == 'random':
            weights[f'W{i+1}'] = np.random.randn(prev_size, hidden_size) * 0.01
        elif weight == 'xavier':
            weights[f'W{i+1}'] = np.random.randn(prev_size, hidden_size) * np.sqrt(1 / prev_size)
        weights[f'b{i+1}'] = np.zeros((1, hidden_size))
        prev_size = hidden_size

    if weight == 'random':
        weights['W_out'] = np.random.randn(prev_size, output_size) * 0.01
    elif weight == 'xavier':
        weights['W_out'] = np.random.randn(prev_size, output_size) * np.sqrt(1 / prev_size)
    weights['b_out'] = np.zeros((1, output_size))

    return weights

def activation_function(Z, activation):
    if activation == 'sigmoid':
        return 1 / (1 + np.exp(-Z))
    elif activation == 'tanh':
        return np.tanh(Z)
    elif activation == 'relu':
        return np.maximum(0, Z)
    elif activation == 'softmax':
        expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return expZ / np.sum(expZ, axis=1, keepdims=True)

def derivative_activation(Z, activation):
    if activation == 'sigmoid':
        sig = activation_function(Z, 'sigmoid')
        return sig * (1 - sig)
    elif activation == 'tanh':
        return 1 - np.tanh(Z)**2
    elif activation == 'relu':
        return (Z > 0).astype(float)

def cross_entropy_loss(y_actual, y_pred):
    return -np.mean(y_actual * np.log(y_pred + 1e-9))


def mean_squared_error(y_actual, y_pred):
    return np.mean((y_actual - y_pred) ** 2)


def forward_propagation(X, weights, num_hidden_layer, activation):
    A = X.reshape(X.shape[0], -1)
    cache = {'A0': A}

    for i in range(num_hidden_layer):
        Z = np.dot(A, weights[f'W{i+1}']) + weights[f'b{i+1}']
        A = activation_function(Z, activation)
        cache[f'Z{i+1}'] = Z
        cache[f'A{i+1}'] = A

    Z_out = np.dot(A, weights['W_out']) + weights['b_out']
    A_out = activation_function(Z_out, 'softmax')

    cache['Z_out'] = Z_out
    cache['A_out'] = A_out

    return A_out, cache

def back_propagation(X, y_actual, weights, cache, num_hidden_layer, activation):
    gradients = {}
    m = X.shape[0]

    dZ_out = cache['A_out'] - y_actual
    gradients['dW_out'] = np.dot(cache[f'A{num_hidden_layer}'].T, dZ_out) / m
    gradients['db_out'] = np.sum(dZ_out, axis=0, keepdims=True) / m

    dA = np.dot(dZ_out, weights['W_out'].T)

    for i in range(num_hidden_layer, 0, -1):
        dZ = dA * derivative_activation(cache[f'Z{i}'], activation)
        gradients[f'dW{i}'] = np.dot(cache[f'A{i-1}'].T, dZ) / m
        gradients[f'db{i}'] = np.sum(dZ, axis=0, keepdims=True) / m
        dA = np.dot(dZ, weights[f'W{i}'].T)

    return gradients

def calculate_accuracy(X, y_actual, weights, num_hidden_layer, activation):
    y_pred, _ = forward_propagation(X, weights, num_hidden_layer, activation)
    return np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_actual, axis=1))

def stochastic_gradient_descent(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, loss_function='cross_entropy'):
    wandb.init(project="stochastic")
    weights = initialize_weights(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]

            y_pred, cache = forward_propagation(X_batch, weights, num_hidden_layer, activation)
            gradients = back_propagation(X_batch, y_batch, weights, cache, num_hidden_layer, activation)

            for key in weights:
                weights[key] -= lr * gradients[f'd{key}']

        train_acc = calculate_accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = calculate_accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = cross_entropy_loss(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = cross_entropy_loss(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = mean_squared_error(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = mean_squared_error(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights




# Set parameters and train
num_hidden_layer = 4
num_nodes_hidden_layers = [128]
weight = 'xavier'
input_size = 28 * 28  # Flattened image size
output_size = 10  # Number of classes
lr = 0.01
batch_size = 64
epochs = 1
activation = 'relu'

trained_weights = stochastic_gradient_descent(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, loss_function='cross_entropy')
wandb.finish()

Epoch 1: Train Acc=0.7354, Train Loss=0.0771, Val Acc=0.7406, Val Loss=0.0753


0,1
epoch,▁
train_acc,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
epoch,1.0
train_acc,0.7354
train_loss,0.07707
val_acc,0.7406
val_loss,0.07525


In [4]:
# Modify momentum_gradient_descent to use either cross-entropy or MSE loss

def momentum_gradient_descent(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, momentum=0.9, loss_function='cross_entropy'):
    wandb.init(project="momentum")
    weights = initialize_weights(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    velocity = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache = forward_propagation(X_batch, weights, num_hidden_layer, activation)
            gradients = back_propagation(X_batch, y_batch, weights, cache, num_hidden_layer, activation)

            for key in weights:
                velocity[key] = momentum * velocity[key] - lr * gradients[f'd{key}']
                weights[key] += velocity[key]

        train_acc = calculate_accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = calculate_accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = cross_entropy_loss(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = cross_entropy_loss(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = mean_squared_error(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = mean_squared_error(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage


# Set parameters and train
num_hidden_layer = 3
num_nodes_hidden_layers = [128]
weight = 'xavier'
input_size = 28 * 28  # Flattened image size
output_size = 10  # Number of classes
lr = 0.01
batch_size = 64
epochs = 1
activation = 'relu'
trained_weights1 = momentum_gradient_descent(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, momentum=0.9, loss_function='cross_entropy')
wandb.finish()


Epoch 1: Train Acc=0.8303, Train Loss=0.0459, Val Acc=0.8340, Val Loss=0.0456


0,1
epoch,▁
train_acc,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
epoch,1.0
train_acc,0.83033
train_loss,0.0459
val_acc,0.834
val_loss,0.04555


In [5]:
def nesterov_gradient_descent(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, momentum=0.9, loss_function='cross_entropy'):
    wandb.init(project="nesterov")
    weights = initialize_weights(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    velocity = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            temp_weights = {key: weights[key] + momentum * velocity[key] for key in weights}
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache = forward_propagation(X_batch, temp_weights, num_hidden_layer, activation)
            gradients = back_propagation(X_batch, y_batch, temp_weights, cache, num_hidden_layer, activation)

            for key in weights:
                velocity[key] = momentum * velocity[key] - lr * gradients[f'd{key}']
                weights[key] += velocity[key]

        train_acc = calculate_accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = calculate_accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = cross_entropy_loss(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = cross_entropy_loss(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = mean_squared_error(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = mean_squared_error(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage


# Set parameters and train
num_hidden_layer = 5
num_nodes_hidden_layers = [128]
weight = 'xavier'
input_size = 28 * 28  # Flattened image size
output_size = 10  # Number of classes
lr = 0.01
batch_size = 64
epochs = 1
activation = 'relu'

trained_weights2 = nesterov_gradient_descent(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, momentum=0.9, loss_function='cross_entropy')
wandb.finish()


Epoch 1: Train Acc=0.8514, Train Loss=0.0411, Val Acc=0.8528, Val Loss=0.0415


0,1
epoch,▁
train_acc,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
epoch,1.0
train_acc,0.85144
train_loss,0.04111
val_acc,0.8528
val_loss,0.04146


In [6]:
def rmsprop_optimizer(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta=0.9, epsilon=1e-8, loss_function='cross_entropy'):
    wandb.init(project="rmsprop")
    weights = initialize_weights(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    cache = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache_forward = forward_propagation(X_batch, weights, num_hidden_layer, activation)
            gradients = back_propagation(X_batch, y_batch, weights, cache_forward, num_hidden_layer, activation)

            for key in weights:
                cache[key] = beta * cache[key] + (1 - beta) * gradients[f'd{key}']**2
                weights[key] -= lr * gradients[f'd{key}'] / (np.sqrt(cache[key]) + epsilon)

        train_acc = calculate_accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = calculate_accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = cross_entropy_loss(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = cross_entropy_loss(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = mean_squared_error(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = mean_squared_error(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage

# Set parameters and train
num_hidden_layer = 4
num_nodes_hidden_layers = [128]
weight = 'xavier'
input_size = 28 * 28  # Flattened image size
output_size = 10  # Number of classes
lr = 0.01
batch_size = 64
epochs = 1
activation = 'sigmoid'


trained_weights3 = rmsprop_optimizer(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta=0.9, epsilon=1e-8, loss_function='cross_entropy')
wandb.finish()

Epoch 1: Train Acc=0.7958, Train Loss=0.0572, Val Acc=0.7950, Val Loss=0.0576


0,1
epoch,▁
train_acc,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
epoch,1.0
train_acc,0.79584
train_loss,0.05722
val_acc,0.795
val_loss,0.05764


In [7]:
def adam_optimizer(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy'):
    wandb.init(project="adam")
    weights = initialize_weights(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    m = {key: np.zeros_like(value) for key, value in weights.items()}
    v = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache_forward = forward_propagation(X_batch, weights, num_hidden_layer, activation)
            gradients = back_propagation(X_batch, y_batch, weights, cache_forward, num_hidden_layer, activation)

            for key in weights:
                m[key] = beta1 * m[key] + (1 - beta1) * gradients[f'd{key}']
                v[key] = beta2 * v[key] + (1 - beta2) * (gradients[f'd{key}'] ** 2)
                m_hat = m[key] / (1 - beta1 ** (epoch + 1))
                v_hat = v[key] / (1 - beta2 ** (epoch + 1))
                weights[key] -= lr * m_hat / (np.sqrt(v_hat) + epsilon)

        train_acc = calculate_accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = calculate_accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = cross_entropy_loss(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = cross_entropy_loss(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = mean_squared_error(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = mean_squared_error(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage

# Set parameters and train
num_hidden_layer = 4
num_nodes_hidden_layers = [128]
weight = 'xavier'
input_size = 28 * 28  # Flattened image size
output_size = 10  # Number of classes
lr = 0.01
batch_size = 64
epochs = 1
activation = 'sigmoid'


trained_weights4 = adam_optimizer(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy')
wandb.finish()


Epoch 1: Train Acc=0.8282, Train Loss=0.0488, Val Acc=0.8378, Val Loss=0.0480


0,1
epoch,▁
train_acc,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
epoch,1.0
train_acc,0.82822
train_loss,0.04879
val_acc,0.8378
val_loss,0.04803


In [8]:
def nadam_optimizer(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy'):
    wandb.init(project="nadam")
    weights = initialize_weights(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    m = {key: np.zeros_like(value) for key, value in weights.items()}
    v = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache_forward = forward_propagation(X_batch, weights, num_hidden_layer, activation)
            gradients = back_propagation(X_batch, y_batch, weights, cache_forward, num_hidden_layer, activation)

            for key in weights:
                m[key] = beta1 * m[key] + (1 - beta1) * gradients[f'd{key}']
                v[key] = beta2 * v[key] + (1 - beta2) * (gradients[f'd{key}'] ** 2)
                m_hat = m[key] / (1 - beta1 ** (epoch + 1))
                v_hat = v[key] / (1 - beta2 ** (epoch + 1))
                nadam_update = beta1 * m_hat + (1 - beta1) * gradients[f'd{key}']
                weights[key] -= lr * nadam_update / (np.sqrt(v_hat) + epsilon)

        train_acc = calculate_accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = calculate_accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = cross_entropy_loss(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = cross_entropy_loss(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = mean_squared_error(y_train, forward_propagation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = mean_squared_error(y_val, forward_propagation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage

# Set parameters and train
num_hidden_layer = 4
num_nodes_hidden_layers = [128]
weight = 'xavier'
input_size = 28 * 28  # Flattened image size
output_size = 10  # Number of classes
lr = 0.01
batch_size = 64
epochs = 4
activation = 'sigmoid'

trained_weights5 = nadam_optimizer(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy')
wandb.finish()


Epoch 1: Train Acc=0.8304, Train Loss=0.0474, Val Acc=0.8284, Val Loss=0.0493
Epoch 2: Train Acc=0.8655, Train Loss=0.0379, Val Acc=0.8570, Val Loss=0.0397
Epoch 3: Train Acc=0.8792, Train Loss=0.0339, Val Acc=0.8654, Val Loss=0.0368
Epoch 4: Train Acc=0.8848, Train Loss=0.0317, Val Acc=0.8712, Val Loss=0.0352


0,1
epoch,▁▃▆█
train_acc,▁▆▇█
train_loss,█▄▂▁
val_acc,▁▆▇█
val_loss,█▃▂▁

0,1
epoch,4.0
train_acc,0.88482
train_loss,0.03168
val_acc,0.8712
val_loss,0.0352


#Question - 4 , 5 , 6

In [9]:
import numpy as np
import tensorflow as tf
import wandb
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import fashion_mnist


def main():
    sweep_config = {
        'method': 'bayes',
        'metric': {'name': 'accuracy', 'goal': 'maximize'},
        'parameters': {
            'epochs': {'values': [5, 10]},
            'num_layers': {'values': [3, 4, 5]},
            'hidden_size': {'values': [32, 64, 128]},
            'weight_decay': {'values': [0, 0.0005, 0.5]},
            'learning_rate': {'values': [1e-3, 1e-4]},
            'optimizer': {'values': ['stochastic', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']},
            'batch_size': {'values': [16, 32, 64]},
            'weight_init': {'values': ['random', 'xavier']},
            'activation': {'values': ['sigmoid', 'tanh', 'relu']},
        }
    }
    sweep_id = wandb.sweep(sweep_config, project="fashion-mnist-nn")
    wandb.agent(sweep_id, function=train, count=3)

def train():
    wandb.init(project="Vinod_A1")
    # wandb.init()
    config = wandb.config
    run_name = f"Opt-{config.optimizer}_Layers-{config.num_layers}_HS-{config.hidden_size}_LR-{config.learning_rate}_Batch-{config.batch_size}_Act-{config.activation}"
    wandb.run.name = run_name

    # x_train, y_train, x_val, y_val, _, _ = load_and_preprocess_data()

    optimizer = config.optimizer

    if optimizer == 'stochastic':
        trained_weights = stochastic_gradient_descent(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
        # trained_weights = stochastic_gradient_descent(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size)
    elif optimizer == 'momentum':
        trained_weights = momentum_gradient_descent(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'nag':
        trained_weights = nesterov_gradient_descent(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'rmsprop':
        trained_weights = rmsprop_optimizer(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'adam':
        trained_weights = adam_optimizer(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'nadam':
        trained_weights = nadam_optimizer(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')

    #wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss})

    wandb.finish()

if __name__ == "__main__":
    main()


Create sweep with ID: nc8rvczp
Sweep URL: https://wandb.ai/viinod9-iitm/fashion-mnist-nn/sweeps/nc8rvczp


[34m[1mwandb[0m: Agent Starting Run: rsrln0bc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8484, Train Loss=0.0424, Val Acc=0.8474, Val Loss=0.0420
Epoch 2: Train Acc=0.8622, Train Loss=0.0383, Val Acc=0.8606, Val Loss=0.0387
Epoch 3: Train Acc=0.8698, Train Loss=0.0362, Val Acc=0.8652, Val Loss=0.0372
Epoch 4: Train Acc=0.8752, Train Loss=0.0347, Val Acc=0.8676, Val Loss=0.0362
Epoch 5: Train Acc=0.8792, Train Loss=0.0335, Val Acc=0.8702, Val Loss=0.0355
Epoch 6: Train Acc=0.8824, Train Loss=0.0325, Val Acc=0.8730, Val Loss=0.0350
Epoch 7: Train Acc=0.8860, Train Loss=0.0316, Val Acc=0.8762, Val Loss=0.0344
Epoch 8: Train Acc=0.8890, Train Loss=0.0308, Val Acc=0.8782, Val Loss=0.0340
Epoch 9: Train Acc=0.8916, Train Loss=0.0300, Val Acc=0.8814, Val Loss=0.0336
Epoch 10: Train Acc=0.8940, Train Loss=0.0293, Val Acc=0.8816, Val Loss=0.0332


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▄▅▅▆▆▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.89398
train_loss,0.02928
val_acc,0.8816
val_loss,0.03322


[34m[1mwandb[0m: Agent Starting Run: cgsgal8g with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8057, Train Loss=0.0555, Val Acc=0.8096, Val Loss=0.0548
Epoch 2: Train Acc=0.8335, Train Loss=0.0473, Val Acc=0.8338, Val Loss=0.0468
Epoch 3: Train Acc=0.8454, Train Loss=0.0438, Val Acc=0.8438, Val Loss=0.0437
Epoch 4: Train Acc=0.8542, Train Loss=0.0414, Val Acc=0.8508, Val Loss=0.0417
Epoch 5: Train Acc=0.8619, Train Loss=0.0390, Val Acc=0.8626, Val Loss=0.0395
Epoch 6: Train Acc=0.8691, Train Loss=0.0370, Val Acc=0.8674, Val Loss=0.0379
Epoch 7: Train Acc=0.8734, Train Loss=0.0357, Val Acc=0.8698, Val Loss=0.0369
Epoch 8: Train Acc=0.8768, Train Loss=0.0346, Val Acc=0.8722, Val Loss=0.0360
Epoch 9: Train Acc=0.8795, Train Loss=0.0336, Val Acc=0.8754, Val Loss=0.0353
Epoch 10: Train Acc=0.8818, Train Loss=0.0328, Val Acc=0.8774, Val Loss=0.0349


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▅▆▇▇███
train_loss,█▅▄▄▃▂▂▂▁▁
val_acc,▁▃▅▅▆▇▇▇██
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.88176
train_loss,0.03284
val_acc,0.8774
val_loss,0.03486


[34m[1mwandb[0m: Agent Starting Run: 9s6ztrz7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▅▅▄▃▃▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▄▅▅▆▇▇██

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


#Question - 7

In [10]:
import numpy as np
import tensorflow as tf
import wandb
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import fashion_mnist

def load_and_preprocess_data():
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    val_size = 5000
    x_val, y_val = x_train[:val_size], y_train[:val_size]
    x_train, y_train = x_train[val_size:], y_train[val_size:]
    x_train, x_val, x_test = x_train / 255.0, x_val / 255.0, x_test / 255.0
    y_train = to_categorical(y_train, 10)
    y_val = to_categorical(y_val, 10)
    y_test = to_categorical(y_test, 10)
    return x_train, y_train, x_val, y_val, x_test, y_test

def plot_confusion_matrix(y_true, y_pred, config_name):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(10), yticklabels=range(10))
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix - {config_name}')

    wandb.log({f"Confusion Matrix - {config_name}": wandb.Image(plt)})
    plt.close()

def evaluate_best_configs(best_configs):
    x_train, y_train, x_val, y_val, x_test, y_test = load_and_preprocess_data()
    y_test_labels = np.argmax(y_test, axis=1)

    for config in best_configs:
        wandb.init(project="fashion-mnist-nn", name=f"Confusion_Matrix_{config['name']}", reinit=True)

        if config['optimizer'] == 'rmsprop':
            trained_weights = rmsprop_optimizer(config['learning_rate'], x_train, y_train, x_val, y_val, config['epochs'], config['activation'], config['num_layers'], config['hidden_size'], config['weight_init'], config['batch_size'], 28*28, 10)

        y_pred_probs, _ = forward_propagation(x_test, trained_weights, config['num_layers'], config['activation'])
        y_pred_labels = np.argmax(y_pred_probs, axis=1)
        plot_confusion_matrix(y_test_labels, y_pred_labels, config['name'])
        wandb.finish()

if __name__ == "__main__":
    best_configs = [
        {
            'name': 'Best_Config_1',
            'epochs': 10,
            'num_layers': 5,
            'hidden_size': 128,
            'learning_rate': 0.0001,
            'batch_size': 64,
            'optimizer': 'rmsprop',
            'weight_decay': 0.5,
            'weight_init': 'xavier',
            'activation': 'tanh'
        },
        {
            'name': 'Best_Config_2',
            'epochs': 10,
            'num_layers': 3,
            'hidden_size': 128,
            'learning_rate': 0.001,
            'batch_size': 64,
            'optimizer': 'rmsprop',
            'weight_decay': 0,
            'weight_init': 'xavier',
            'activation': 'tanh'
        }
    ]
    evaluate_best_configs(best_configs)


Epoch 1: Train Acc=0.8339, Train Loss=0.0463, Val Acc=0.8390, Val Loss=0.0457
Epoch 2: Train Acc=0.8517, Train Loss=0.0410, Val Acc=0.8548, Val Loss=0.0411
Epoch 3: Train Acc=0.8604, Train Loss=0.0384, Val Acc=0.8614, Val Loss=0.0391
Epoch 4: Train Acc=0.8667, Train Loss=0.0366, Val Acc=0.8674, Val Loss=0.0377
Epoch 5: Train Acc=0.8721, Train Loss=0.0351, Val Acc=0.8702, Val Loss=0.0367
Epoch 6: Train Acc=0.8765, Train Loss=0.0340, Val Acc=0.8706, Val Loss=0.0359
Epoch 7: Train Acc=0.8799, Train Loss=0.0330, Val Acc=0.8734, Val Loss=0.0352
Epoch 8: Train Acc=0.8826, Train Loss=0.0322, Val Acc=0.8758, Val Loss=0.0346
Epoch 9: Train Acc=0.8851, Train Loss=0.0314, Val Acc=0.8774, Val Loss=0.0341
Epoch 10: Train Acc=0.8883, Train Loss=0.0307, Val Acc=0.8792, Val Loss=0.0337


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.88835
train_loss,0.03071
val_acc,0.8792
val_loss,0.03369


Epoch 1: Train Acc=0.8435, Train Loss=0.0421, Val Acc=0.8446, Val Loss=0.0427
Epoch 2: Train Acc=0.8588, Train Loss=0.0379, Val Acc=0.8528, Val Loss=0.0400
Epoch 3: Train Acc=0.8727, Train Loss=0.0340, Val Acc=0.8634, Val Loss=0.0369
Epoch 4: Train Acc=0.8825, Train Loss=0.0313, Val Acc=0.8702, Val Loss=0.0350
Epoch 5: Train Acc=0.8921, Train Loss=0.0290, Val Acc=0.8774, Val Loss=0.0337
Epoch 6: Train Acc=0.8959, Train Loss=0.0279, Val Acc=0.8794, Val Loss=0.0335
Epoch 7: Train Acc=0.8989, Train Loss=0.0271, Val Acc=0.8822, Val Loss=0.0338
Epoch 8: Train Acc=0.9031, Train Loss=0.0259, Val Acc=0.8818, Val Loss=0.0335
Epoch 9: Train Acc=0.9060, Train Loss=0.0252, Val Acc=0.8820, Val Loss=0.0337
Epoch 10: Train Acc=0.9037, Train Loss=0.0256, Val Acc=0.8792, Val Loss=0.0349


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▇▇███
train_loss,█▆▅▄▃▂▂▁▁▁
val_acc,▁▃▄▆▇▇███▇
val_loss,█▆▄▂▁▁▁▁▁▂

0,1
epoch,10.0
train_acc,0.90369
train_loss,0.02556
val_acc,0.8792
val_loss,0.03486


#Question - 8

In [11]:
def main():
    sweep_config = {
        'method': 'bayes',
        'metric': {'name': 'accuracy', 'goal': 'maximize'},
        'parameters': {
            'epochs': {'values': [5, 10]},
            'num_layers': {'values': [3, 4, 5]},
            'hidden_size': {'values': [32, 64, 128]},
            'weight_decay': {'values': [0, 0.0005, 0.5]},
            'learning_rate': {'values': [1e-3, 1e-4]},
            'optimizer': {'values': ['stochastic', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']},
            'batch_size': {'values': [16, 32, 64]},
            'weight_init': {'values': ['random', 'xavier']},
            'activation': {'values': ['sigmoid', 'tanh', 'relu']},
        }
    }
    sweep_id = wandb.sweep(sweep_config, project="MSE")
    wandb.agent(sweep_id, function=train, count=3)

def train():
    wandb.init(project="MSE")
    # wandb.init()
    config = wandb.config
    run_name = f"Opt-{config.optimizer}_Layers-{config.num_layers}_HS-{config.hidden_size}_LR-{config.learning_rate}_Batch-{config.batch_size}_Act-{config.activation}"
    wandb.run.name = run_name

    # x_train, y_train, x_val, y_val, _, _ = load_and_preprocess_data()

    optimizer = config.optimizer

    if optimizer == 'stochastic':
        trained_weights = stochastic_gradient_descent(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
        # trained_weights = stochastic_gradient_descent(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size)
    elif optimizer == 'momentum':
        trained_weights = momentum_gradient_descent(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
    elif optimizer == 'nag':
        trained_weights = nesterov_gradient_descent(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
    elif optimizer == 'rmsprop':
        trained_weights = rmsprop_optimizer(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
    elif optimizer == 'adam':
        trained_weights = adam_optimizer(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
    elif optimizer == 'nadam':
        trained_weights = nadam_optimizer(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')

    #wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss})

    wandb.finish()

if __name__ == "__main__":
    main()


Create sweep with ID: 1qlwp8s7
Sweep URL: https://wandb.ai/viinod9-iitm/MSE/sweeps/1qlwp8s7


[34m[1mwandb[0m: Agent Starting Run: khva0rtb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 2: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▄▂▁▁
val_acc,▁▁▁▁▁
val_loss,▁▄▆▇█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09


[34m[1mwandb[0m: Agent Starting Run: 8g4z9uuu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5426, Train Loss=0.0594, Val Acc=0.5394, Val Loss=0.0595
Epoch 2: Train Acc=0.7168, Train Loss=0.0354, Val Acc=0.7230, Val Loss=0.0353
Epoch 3: Train Acc=0.7901, Train Loss=0.0297, Val Acc=0.7896, Val Loss=0.0299
Epoch 4: Train Acc=0.8040, Train Loss=0.0278, Val Acc=0.8024, Val Loss=0.0281
Epoch 5: Train Acc=0.8195, Train Loss=0.0253, Val Acc=0.8166, Val Loss=0.0260
Epoch 6: Train Acc=0.8523, Train Loss=0.0222, Val Acc=0.8462, Val Loss=0.0233
Epoch 7: Train Acc=0.8596, Train Loss=0.0212, Val Acc=0.8512, Val Loss=0.0221
Epoch 8: Train Acc=0.8660, Train Loss=0.0202, Val Acc=0.8590, Val Loss=0.0213
Epoch 9: Train Acc=0.8681, Train Loss=0.0199, Val Acc=0.8610, Val Loss=0.0212
Epoch 10: Train Acc=0.8707, Train Loss=0.0192, Val Acc=0.8608, Val Loss=0.0207


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▇▇█████
train_loss,█▄▃▃▂▂▁▁▁▁
val_acc,▁▅▆▇▇█████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.87065
train_loss,0.01915
val_acc,0.8608
val_loss,0.02071


[34m[1mwandb[0m: Agent Starting Run: oy3blwug with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8495, Train Loss=0.0211, Val Acc=0.8556, Val Loss=0.0209
Epoch 2: Train Acc=0.8615, Train Loss=0.0198, Val Acc=0.8624, Val Loss=0.0197
Epoch 3: Train Acc=0.8557, Train Loss=0.0207, Val Acc=0.8566, Val Loss=0.0211
Epoch 4: Train Acc=0.8574, Train Loss=0.0201, Val Acc=0.8564, Val Loss=0.0202
Epoch 5: Train Acc=0.8493, Train Loss=0.0214, Val Acc=0.8420, Val Loss=0.0219


0,1
epoch,▁▃▅▆█
train_acc,▁█▅▆▁
train_loss,▇▁▅▂█
val_acc,▆█▆▆▁
val_loss,▅▁▆▃█

0,1
epoch,5.0
train_acc,0.84925
train_loss,0.02136
val_acc,0.842
val_loss,0.0219


#Question- 10

In [12]:
# Load and preprocess dataset
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist

def load_and_preprocess_data_mnist():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    # Split into train and validation
    val_size = 5000
    x_val, y_val = x_train[:val_size], y_train[:val_size]
    x_train, y_train = x_train[val_size:], y_train[val_size:]

    # Normalize dataset
    x_train, x_val, x_test = x_train / 255.0, x_val / 255.0, x_test / 255.0

    # One-hot encoding
    y_train = to_categorical(y_train, 10)
    y_val = to_categorical(y_val, 10)
    y_test = to_categorical(y_test, 10)

    return x_train, y_train, x_val, y_val, x_test, y_test

# Load data
x_train_mnist, y_train_mnist, x_val_mnist, y_val_mnist, x_test_mnist, y_test_mnist = load_and_preprocess_data_mnist()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [13]:
# # Set parameters and train
# num_hidden_layer = 4
# num_nodes_hidden_layers = [128]
# weight = 'xavier'
# input_size = 28 * 28  # Flattened image size
# output_size = 10  # Number of classes
# lr = 0.01
# batch_size = 64
# epochs = 4
# activation = 'sigmoid'

# trained_weights_mnist1 = nadam_optimizer(lr, x_train_mnist, y_train_mnist, x_val_mnist, y_val_mnist, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy')


Epoch 1: Train Acc=0.9461, Train Loss=0.0190, Val Acc=0.9490, Val Loss=0.0194
Epoch 2: Train Acc=0.9620, Train Loss=0.0126, Val Acc=0.9616, Val Loss=0.0148
Epoch 3: Train Acc=0.9715, Train Loss=0.0094, Val Acc=0.9658, Val Loss=0.0131
Epoch 4: Train Acc=0.9765, Train Loss=0.0077, Val Acc=0.9670, Val Loss=0.0127


In [14]:
# # Set parameters and train
# num_hidden_layer = 4
# num_nodes_hidden_layers = [128]
# weight = 'xavier'
# input_size = 28 * 28  # Flattened image size
# output_size = 10  # Number of classes
# lr = 0.01
# batch_size = 64
# epochs = 4
# activation = 'sigmoid'

# trained_weights_mnist2 = adam_optimizer(lr, x_train_mnist, y_train_mnist, x_val_mnist, y_val_mnist, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy')


Epoch 1: Train Acc=0.9456, Train Loss=0.0186, Val Acc=0.9460, Val Loss=0.0187
Epoch 2: Train Acc=0.9665, Train Loss=0.0116, Val Acc=0.9616, Val Loss=0.0135
Epoch 3: Train Acc=0.9730, Train Loss=0.0091, Val Acc=0.9654, Val Loss=0.0127
Epoch 4: Train Acc=0.9806, Train Loss=0.0067, Val Acc=0.9688, Val Loss=0.0111


In [16]:
# # Set parameters and train
# num_hidden_layer = 4
# num_nodes_hidden_layers = [128]
# weight = 'xavier'
# input_size = 28 * 28
# output_size = 10
# lr = 0.01
# batch_size = 64
# epochs = 4
# activation = 'sigmoid'

# trained_weights_mnist3 = rmsprop_optimizer(lr, x_train_mnist, y_train_mnist, x_val_mnist, y_val_mnist, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta=0.9, epsilon=1e-8, loss_function='cross_entropy')


Epoch 1: Train Acc=0.9471, Train Loss=0.0182, Val Acc=0.9530, Val Loss=0.0168
Epoch 2: Train Acc=0.9554, Train Loss=0.0162, Val Acc=0.9566, Val Loss=0.0168
Epoch 3: Train Acc=0.9609, Train Loss=0.0147, Val Acc=0.9596, Val Loss=0.0174
Epoch 4: Train Acc=0.9723, Train Loss=0.0112, Val Acc=0.9672, Val Loss=0.0157
