In [3]:
import wandb
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist

In [2]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mvinyk-sd[0m ([33mvinyk-sd-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
# Load dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize and reshape data
X_train, X_test = X_train / 255.0, X_test / 255.0
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# One-hot encoding
y_train_one_hot = np.eye(10)[y_train]
y_test_one_hot = np.eye(10)[y_test]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [5]:
class NeuralNetwork:
    def __init__(self, layers, activation=None, output_activation = 'softmax', weight_init=None):
        """
        Initializes the neural network.
        :param layers: List representing the number of neurons in each layer.
        :param activation: Activation function for hidden layers ('relu', 'sigmoid', 'tanh').
        :param output_activation: Activation function for the output layer ('softmax', 'sigmoid', or 'linear').
        :param weight_init: Weight initialization method ('random', 'xavier').
        """
        self.layers = layers
        self.activation = activation
        self.output_activation = output_activation  # Separate activation for output layer
        self.weight_init = weight_init
        self.weights, self.biases = self._initialize_weights()

    def _initialize_weights(self):
        """Initializes weights and biases based on the selected method."""
        np.random.seed(42)
        weights, biases = [], []

        for i in range(len(self.layers) - 1):
            if self.weight_init == "xavier":
                limit = np.sqrt(6 / (self.layers[i] + self.layers[i+1]))
                w = np.random.uniform(-limit, limit, (self.layers[i], self.layers[i+1]))
            else:  # Default: Random small values
                w = np.random.randn(self.layers[i], self.layers[i+1]) * 0.01
            
            b = np.zeros((1, self.layers[i+1]))
            weights.append(w)
            biases.append(b)
        
        return weights, biases

    def _activation(self, x, func, derivative=False):
        """Applies the selected activation function."""
        if func == "relu":
            return np.where(x > 0, 1, 0) if derivative else np.maximum(0, x)
        elif func == "tanh":
            t = np.tanh(x)
            return 1 - t**2 if derivative else t
        elif func == "sigmoid":
            sig = 1 / (1 + np.exp(-x))
            return sig * (1 - sig) if derivative else sig
        elif func == "softmax":
            exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Stability trick
            return exp_x / np.sum(exp_x, axis=1, keepdims=True)
        else:  # Linear (no activation)
            return x

    def forward(self, X):
        """Performs forward propagation."""
        activations, zs = [X], []

        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            z = np.dot(activations[-1], w) + b
            zs.append(z)

            # Use different activation for last layer
            if i == len(self.weights) - 1:
                activations.append(self._activation(z, self.output_activation))
            else:
                activations.append(self._activation(z, self.activation))
        
        return activations, zs

    def backward(self, y_true, activations, zs, learning_rate):
        """Performs backpropagation and computes gradients."""
        grads_w, grads_b = [], []
        m = y_true.shape[0]
        
        # Compute loss gradient (assuming MSE loss for now)
        dA = activations[-1] - y_true  

        for i in reversed(range(len(self.weights))):
            activation_func = self.output_activation if i == len(self.weights) - 1 else self.activation
            dz = dA * self._activation(zs[i], activation_func, derivative=True)
            dw = np.dot(activations[i].T, dz) / m
            db = np.sum(dz, axis=0, keepdims=True) / m
            dA = np.dot(dz, self.weights[i].T)

            grads_w.insert(0, dw)
            grads_b.insert(0, db)

        return grads_w, grads_b


In [6]:
# Optimizers
class Optimizer:
    def __init__(self, method= None, lr=None, beta=0.9, beta2=0.999, epsilon=1e-8):
        self.method = method
        self.lr = lr
        self.beta = beta  # Used for momentum-based optimizers
        self.beta2 = beta2  # Used for RMSprop, Adam, Nadam
        self.epsilon = epsilon  # Prevents division by zero
        self.m_w = None  # First moment vector for weights
        self.v_w = None  # Second moment vector for weights
        self.m_b = None  # First moment vector for biases
        self.v_b = None  # Second moment vector for biases
        self.t = 0  # Time step for bias correction

    def update(self, weights, biases, grads_w, grads_b):
        if self.method == "sgd":
            weights = [w - self.lr * dw for w, dw in zip(weights, grads_w)]
            biases = [b - self.lr * db for b, db in zip(biases, grads_b)]

        elif self.method in ["momentum", "nesterov"]:
            if self.m_w is None:  # Initialize momentum terms
                self.m_w = [np.zeros_like(w) for w in weights]
                self.m_b = [np.zeros_like(b) for b in biases]

            for i in range(len(weights)):
                if self.method == "nesterov":
                    # Look-ahead step
                    look_ahead_w = weights[i] - self.beta * self.m_w[i]
                    look_ahead_b = biases[i] - self.beta * self.m_b[i]

                    # Compute gradients at look-ahead position
                    self.m_w[i] = self.beta * self.m_w[i] + self.lr * grads_w[i]
                    self.m_b[i] = self.beta * self.m_b[i] + self.lr * grads_b[i]

                    weights[i] = look_ahead_w - self.m_w[i]
                    biases[i] = look_ahead_b - self.m_b[i]

                else:  # Normal momentum
                    self.m_w[i] = self.beta * self.m_w[i] + self.lr * grads_w[i]
                    self.m_b[i] = self.beta * self.m_b[i] + self.lr * grads_b[i]
                    weights[i] -= self.m_w[i]
                    biases[i] -= self.m_b[i]

        elif self.method in ["rmsprop", "adam", "nadam"]:
            if self.m_w is None:  # Initialize moment estimates
                self.m_w = [np.zeros_like(w) for w in weights]
                self.v_w = [np.zeros_like(w) for w in weights]
                self.m_b = [np.zeros_like(b) for b in biases]
                self.v_b = [np.zeros_like(b) for b in biases]

            self.t += 1  # Update time step

            for i in range(len(weights)):
                if self.method == "rmsprop":
                    # Update second moment estimate (squared gradient)
                    self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * grads_w[i] ** 2
                    self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * grads_b[i] ** 2

                    # Update weights and biases
                    weights[i] -= self.lr * grads_w[i] / (np.sqrt(self.v_w[i]) + self.epsilon)
                    biases[i] -= self.lr * grads_b[i] / (np.sqrt(self.v_b[i]) + self.epsilon)

                elif self.method in ["adam", "nadam"]:
                    # Compute biased first moment estimate
                    self.m_w[i] = self.beta * self.m_w[i] + (1 - self.beta) * grads_w[i]
                    self.m_b[i] = self.beta * self.m_b[i] + (1 - self.beta) * grads_b[i]

                    # Compute biased second moment estimate
                    self.v_w[i] = self.beta2 * self.v_w[i] + (1 - self.beta2) * (grads_w[i] ** 2)
                    self.v_b[i] = self.beta2 * self.v_b[i] + (1 - self.beta2) * (grads_b[i] ** 2)

                    # Bias correction
                    m_hat_w = self.m_w[i] / (1 - self.beta ** self.t)
                    v_hat_w = self.v_w[i] / (1 - self.beta2 ** self.t)
                    m_hat_b = self.m_b[i] / (1 - self.beta ** self.t)
                    v_hat_b = self.v_b[i] / (1 - self.beta2 ** self.t)

                    if self.method == "adam":
                        # Adam update rule
                        weights[i] -= self.lr * m_hat_w / (np.sqrt(v_hat_w) + self.epsilon)
                        biases[i] -= self.lr * m_hat_b / (np.sqrt(v_hat_b) + self.epsilon)

                    elif self.method == "nadam":
                        # Nadam additional momentum correction
                        nadam_m_w = self.beta * m_hat_w + (1 - self.beta) * grads_w[i] / (1 - self.beta ** self.t)
                        nadam_m_b = self.beta * m_hat_b + (1 - self.beta) * grads_b[i] / (1 - self.beta ** self.t)

                        weights[i] -= self.lr * nadam_m_w / (np.sqrt(v_hat_w) + self.epsilon)
                        biases[i] -= self.lr * nadam_m_b / (np.sqrt(v_hat_b) + self.epsilon)

        return weights, biases

In [7]:
def train():
    wandb.init(project="Assignment1_Attempt3")
    config = wandb.config  # Get hyperparameters
    run_name = f"hl_{config.hidden_layers}_bs_{config.batch_size}_ac_{config.activation}_e_{config.epochs}"
    print(run_name)
    wandb.run.name = run_name

    # Extract hyperparameters from WandB
    layers = [784] + [config.hidden_size] * config.hidden_layers + [10]
    activation = config.activation
    weight_init = config.weight_init
    learning_rate = config.learning_rate
    batch_size = config.batch_size
    optimizer = config.optimizer
    epochs = config.epochs


    # Initialize Neural Network
    model = NeuralNetwork(layers, activation=activation, weight_init=weight_init)
    optimizer = Optimizer(method=optimizer, lr=learning_rate)

    for epoch in range(epochs):
        num_batches = len(X_train) // batch_size
        train_loss, train_acc = 0, 0

        for i in range(num_batches):
            start, end = i * batch_size, (i + 1) * batch_size
            X_batch, y_batch = X_train[start:end], y_train_one_hot[start:end]

            # Forward propagation
            activations, zs = model.forward(X_batch)
            preds = np.argmax(activations[-1], axis=1)
            y_true_labels = np.argmax(y_batch, axis=1)

            # Compute loss and accuracy
            batch_loss = np.mean((activations[-1] - y_batch) ** 2)
            batch_acc = np.mean(preds == y_true_labels)

            train_loss += batch_loss
            train_acc += batch_acc

            # Backpropagation
            grads_w, grads_b = model.backward(y_batch, activations, zs, learning_rate)
            model.weights, model.biases = optimizer.update(model.weights, model.biases, grads_w, grads_b)

        # Average loss and accuracy
        train_loss /= num_batches
        train_acc /= num_batches

        # Validation phase
        val_activations, _ = model.forward(X_test)
        val_preds = np.argmax(val_activations[-1], axis=1)
        val_true_labels = np.argmax(y_test_one_hot, axis=1)

        val_loss = np.mean((val_activations[-1] - y_test_one_hot) ** 2)
        val_acc = np.mean(val_preds == val_true_labels)

        # Log metrics to WandB
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "val_loss": val_loss,
            "train_accuracy": train_acc,
            "val_accuracy": val_acc
        })

        print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Train Acc={train_acc:.4f}, Val Loss={val_loss:.4f}, Val Acc={val_acc:.4f}")


In [8]:
sweep_config = {
    'method': 'bayes',  # Bayesian optimization for efficiency
    'metric': {'name': 'val_loss', 'goal': 'minimize'},  # Optimize for lowest validation loss
    'parameters': {
        'epochs': {'values': [5, 10, 15]},
        'hidden_layers': {'values': [3, 4, 5]},
        'hidden_size': {'values': [32, 64, 128]},
        'weight_decay': {'values': [0, 0.0005, 0.5]},
        'learning_rate': {'values': [1e-3, 1e-4]},
        'optimizer': {'values': ['sgd', 'momentum', 'nesterov', 'rmsprop', 'adam', 'nadam']},
        'batch_size': {'values': [16, 32, 64]},
        'weight_init': {'values': ['random', 'xavier']},
        'activation': {'values': ['sigmoid', 'tanh', 'relu']},  # <--- Added missing comma
        'epsilon': {'values': [0.8, 0.9, 0.95]}
    }
}

In [9]:
sweep_id = wandb.sweep(sweep_config, project="Assignment_Attempt3")
wandb.agent(sweep_id, train, count=20)  # Run 200 different hyperparameter combinations

Create sweep with ID: cs09wxtb
Sweep URL: https://wandb.ai/vinyk-sd-indian-institute-of-technology-madras/Assignment_Attempt3/sweeps/cs09wxtb


[34m[1mwandb[0m: Agent Starting Run: id4x9yv6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 0.9
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


hl_4_bs_32_ac_sigmoid_e_15
Epoch 1: Train Loss=0.0872, Train Acc=0.1508, Val Loss=0.0833, Val Acc=0.2102
Epoch 2: Train Loss=0.0832, Train Acc=0.2084, Val Loss=0.0835, Val Acc=0.2105
Epoch 3: Train Loss=0.0832, Train Acc=0.2072, Val Loss=0.0820, Val Acc=0.2166
Epoch 4: Train Loss=0.0818, Train Acc=0.2095, Val Loss=0.0815, Val Acc=0.2235
Epoch 5: Train Loss=0.0812, Train Acc=0.2267, Val Loss=0.0804, Val Acc=0.2571
Epoch 6: Train Loss=0.0765, Train Acc=0.2828, Val Loss=0.0738, Val Acc=0.3001
Epoch 7: Train Loss=0.0730, Train Acc=0.3075, Val Loss=0.0718, Val Acc=0.3319
Epoch 8: Train Loss=0.0707, Train Acc=0.3562, Val Loss=0.0694, Val Acc=0.3766
Epoch 9: Train Loss=0.0685, Train Acc=0.3725, Val Loss=0.0677, Val Acc=0.3790
Epoch 10: Train Loss=0.0672, Train Acc=0.3734, Val Loss=0.0668, Val Acc=0.3752
Epoch 11: Train Loss=0.0665, Train Acc=0.3715, Val Loss=0.0663, Val Acc=0.3737
Epoch 12: Train Loss=0.0661, Train Acc=0.3705, Val Loss=0.0661, Val Acc=0.3702
Epoch 13: Train Loss=0.0655, Train

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▃▃▃▃▅▆▇███████
train_loss,█▇▇▆▆▅▄▃▂▂▂▂▂▁▁
val_accuracy,▁▁▁▂▃▅▆▇█▇▇▇▇██
val_loss,██▇▇▇▅▄▃▂▂▂▂▂▁▁

0,1
epoch,15.0
train_accuracy,0.38375
train_loss,0.06384
val_accuracy,0.3894
val_loss,0.06353


[34m[1mwandb[0m: Agent Starting Run: taix6a4y with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 0.8
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


hl_5_bs_16_ac_relu_e_10
Epoch 1: Train Loss=0.0814, Train Acc=0.2194, Val Loss=0.0653, Val Acc=0.4816
Epoch 2: Train Loss=0.0336, Train Acc=0.7598, Val Loss=0.0221, Val Acc=0.8466
Epoch 3: Train Loss=0.0169, Train Acc=0.8861, Val Loss=0.0217, Val Acc=0.8486
Epoch 4: Train Loss=nan, Train Acc=0.5283, Val Loss=nan, Val Acc=0.0980
Epoch 5: Train Loss=nan, Train Acc=0.0987, Val Loss=nan, Val Acc=0.0980
Epoch 6: Train Loss=nan, Train Acc=0.0987, Val Loss=nan, Val Acc=0.0980
Epoch 7: Train Loss=nan, Train Acc=0.0987, Val Loss=nan, Val Acc=0.0980
Epoch 8: Train Loss=nan, Train Acc=0.0987, Val Loss=nan, Val Acc=0.0980
Epoch 9: Train Loss=nan, Train Acc=0.0987, Val Loss=nan, Val Acc=0.0980
Epoch 10: Train Loss=nan, Train Acc=0.0987, Val Loss=nan, Val Acc=0.0980


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▂▇█▅▁▁▁▁▁▁
train_loss,█▃▁
val_accuracy,▅██▁▁▁▁▁▁▁
val_loss,█▁▁

0,1
epoch,10.0
train_accuracy,0.09872
train_loss,
val_accuracy,0.098
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: of9et4nr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


hl_3_bs_32_ac_tanh_e_5
Epoch 1: Train Loss=0.0195, Train Acc=0.8504, Val Loss=0.0088, Val Acc=0.9417
Epoch 2: Train Loss=0.0065, Train Acc=0.9566, Val Loss=0.0060, Val Acc=0.9611
Epoch 3: Train Loss=0.0050, Train Acc=0.9667, Val Loss=0.0054, Val Acc=0.9648
Epoch 4: Train Loss=0.0041, Train Acc=0.9725, Val Loss=0.0055, Val Acc=0.9645
Epoch 5: Train Loss=0.0035, Train Acc=0.9770, Val Loss=0.0058, Val Acc=0.9622


0,1
epoch,▁▃▅▆█
train_accuracy,▁▇▇██
train_loss,█▂▂▁▁
val_accuracy,▁▇██▇
val_loss,█▂▁▁▂

0,1
epoch,5.0
train_accuracy,0.97698
train_loss,0.00346
val_accuracy,0.9622
val_loss,0.00581


[34m[1mwandb[0m: Agent Starting Run: xso09yca with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 0.8
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


hl_5_bs_64_ac_tanh_e_10
Epoch 1: Train Loss=0.0900, Train Acc=0.1088, Val Loss=0.0886, Val Acc=0.1687
Epoch 2: Train Loss=0.0875, Train Acc=0.2330, Val Loss=0.0860, Val Acc=0.3043
Epoch 3: Train Loss=0.0848, Train Acc=0.3485, Val Loss=0.0830, Val Acc=0.3889
Epoch 4: Train Loss=0.0815, Train Acc=0.4076, Val Loss=0.0793, Val Acc=0.4266
Epoch 5: Train Loss=0.0775, Train Acc=0.4345, Val Loss=0.0750, Val Acc=0.4412
Epoch 6: Train Loss=0.0729, Train Acc=0.4500, Val Loss=0.0703, Val Acc=0.4568
Epoch 7: Train Loss=0.0684, Train Acc=0.4656, Val Loss=0.0660, Val Acc=0.4745
Epoch 8: Train Loss=0.0643, Train Acc=0.4865, Val Loss=0.0623, Val Acc=0.4972
Epoch 9: Train Loss=0.0609, Train Acc=0.5155, Val Loss=0.0590, Val Acc=0.5308
Epoch 10: Train Loss=0.0578, Train Acc=0.5479, Val Loss=0.0560, Val Acc=0.5650


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▆▆▇▇▇█
train_loss,█▇▇▆▅▄▃▂▂▁
val_accuracy,▁▃▅▆▆▆▆▇▇█
val_loss,█▇▇▆▅▄▃▂▂▁

0,1
epoch,10.0
train_accuracy,0.54789
train_loss,0.05781
val_accuracy,0.565
val_loss,0.05597


[34m[1mwandb[0m: Agent Starting Run: imwhddz1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


hl_3_bs_64_ac_relu_e_10
Epoch 1: Train Loss=0.0894, Train Acc=0.1392, Val Loss=0.0889, Val Acc=0.1791
Epoch 2: Train Loss=0.0884, Train Acc=0.2414, Val Loss=0.0878, Val Acc=0.2933
Epoch 3: Train Loss=0.0871, Train Acc=0.3405, Val Loss=0.0863, Val Acc=0.3753
Epoch 4: Train Loss=0.0852, Train Acc=0.3911, Val Loss=0.0838, Val Acc=0.3918
Epoch 5: Train Loss=0.0820, Train Acc=0.3827, Val Loss=0.0800, Val Acc=0.3815
Epoch 6: Train Loss=0.0778, Train Acc=0.4212, Val Loss=0.0754, Val Acc=0.4428
Epoch 7: Train Loss=0.0723, Train Acc=0.4614, Val Loss=0.0690, Val Acc=0.4661
Epoch 8: Train Loss=0.0653, Train Acc=0.4907, Val Loss=0.0616, Val Acc=0.5111
Epoch 9: Train Loss=0.0572, Train Acc=0.5796, Val Loss=0.0540, Val Acc=0.6234
Epoch 10: Train Loss=0.0525, Train Acc=0.6274, Val Loss=0.0597, Val Acc=0.5360


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▄▅▄▅▆▆▇█
train_loss,███▇▇▆▅▃▂▁
val_accuracy,▁▃▄▄▄▅▆▆█▇
val_loss,██▇▇▆▅▄▃▁▂

0,1
epoch,10.0
train_accuracy,0.62743
train_loss,0.0525
val_accuracy,0.536
val_loss,0.05974


[34m[1mwandb[0m: Agent Starting Run: 8exbhmm8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 0.9
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


hl_4_bs_64_ac_tanh_e_15
Epoch 1: Train Loss=0.0900, Train Acc=0.1123, Val Loss=0.0900, Val Acc=0.1135
Epoch 2: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 3: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 4: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 5: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 6: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 7: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 8: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 9: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 10: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 11: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 12: Train Loss=0.0900, Train Acc=0.1124, Val Loss=0.0900, Val Acc=0.1135
Epoch 13: Train Loss=0.0900, Train Ac

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁██████████████
train_loss,█▇▆▅▄▃▃▂▂▂▂▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▅▄▃▃▃▂▂▂▁▁▁▁

0,1
epoch,15.0
train_accuracy,0.11238
train_loss,0.08997
val_accuracy,0.1135
val_loss,0.08997


[34m[1mwandb[0m: Agent Starting Run: 8pmz21nv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 0.8
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


hl_3_bs_32_ac_sigmoid_e_15
Epoch 1: Train Loss=0.0948, Train Acc=0.0987, Val Loss=0.0929, Val Acc=0.0980
Epoch 2: Train Loss=0.0923, Train Acc=0.0987, Val Loss=0.0919, Val Acc=0.0980
Epoch 3: Train Loss=0.0916, Train Acc=0.0987, Val Loss=0.0914, Val Acc=0.0980
Epoch 4: Train Loss=0.0912, Train Acc=0.0987, Val Loss=0.0911, Val Acc=0.0979
Epoch 5: Train Loss=0.0910, Train Acc=0.1008, Val Loss=0.0909, Val Acc=0.1047
Epoch 6: Train Loss=0.0908, Train Acc=0.1120, Val Loss=0.0907, Val Acc=0.1143
Epoch 7: Train Loss=0.0906, Train Acc=0.1148, Val Loss=0.0906, Val Acc=0.1126
Epoch 8: Train Loss=0.0905, Train Acc=0.1090, Val Loss=0.0904, Val Acc=0.0990
Epoch 9: Train Loss=0.0904, Train Acc=0.0871, Val Loss=0.0904, Val Acc=0.1042
Epoch 10: Train Loss=0.0903, Train Acc=0.1118, Val Loss=0.0903, Val Acc=0.1135
Epoch 11: Train Loss=0.0902, Train Acc=0.1124, Val Loss=0.0902, Val Acc=0.1135
Epoch 12: Train Loss=0.0902, Train Acc=0.1124, Val Loss=0.0901, Val Acc=0.1135
Epoch 13: Train Loss=0.0901, Train

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▄▄▄▄▄▇█▇▁▇▇▇▇▇▇
train_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▁▁▁▄█▇▁▄██████
val_loss,█▅▄▄▃▃▂▂▂▂▁▁▁▁▁

0,1
epoch,15.0
train_accuracy,0.11237
train_loss,0.09006
val_accuracy,0.1135
val_loss,0.09004


[34m[1mwandb[0m: Agent Starting Run: 52ipmv06 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 0.8
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


hl_5_bs_32_ac_tanh_e_5
Epoch 1: Train Loss=0.0888, Train Acc=0.1724, Val Loss=0.0860, Val Acc=0.3046
Epoch 2: Train Loss=0.0831, Train Acc=0.3778, Val Loss=0.0793, Val Acc=0.4273
Epoch 3: Train Loss=0.0752, Train Acc=0.4420, Val Loss=0.0703, Val Acc=0.4565
Epoch 4: Train Loss=0.0663, Train Acc=0.4768, Val Loss=0.0623, Val Acc=0.4981
Epoch 5: Train Loss=0.0594, Train Acc=0.5317, Val Loss=0.0560, Val Acc=0.5638


0,1
epoch,▁▃▅▆█
train_accuracy,▁▅▆▇█
train_loss,█▇▅▃▁
val_accuracy,▁▄▅▆█
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_accuracy,0.53172
train_loss,0.05935
val_accuracy,0.5638
val_loss,0.05599


[34m[1mwandb[0m: Agent Starting Run: b6xqil1m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


hl_4_bs_64_ac_tanh_e_10
Epoch 1: Train Loss=0.0146, Train Acc=0.9019, Val Loss=0.0090, Val Acc=0.9390
Epoch 2: Train Loss=0.0078, Train Acc=0.9483, Val Loss=0.0076, Val Acc=0.9495
Epoch 3: Train Loss=0.0060, Train Acc=0.9602, Val Loss=0.0072, Val Acc=0.9522
Epoch 4: Train Loss=0.0049, Train Acc=0.9671, Val Loss=0.0068, Val Acc=0.9565
Epoch 5: Train Loss=0.0043, Train Acc=0.9713, Val Loss=0.0056, Val Acc=0.9632
Epoch 6: Train Loss=0.0039, Train Acc=0.9737, Val Loss=0.0074, Val Acc=0.9536
Epoch 7: Train Loss=0.0037, Train Acc=0.9744, Val Loss=0.0055, Val Acc=0.9646
Epoch 8: Train Loss=0.0033, Train Acc=0.9773, Val Loss=0.0058, Val Acc=0.9624
Epoch 9: Train Loss=0.0031, Train Acc=0.9788, Val Loss=0.0067, Val Acc=0.9566
Epoch 10: Train Loss=0.0030, Train Acc=0.9790, Val Loss=0.0056, Val Acc=0.9639


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇█████
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆█▅█▇▆█
val_loss,█▅▄▄▁▅▁▂▃▁

0,1
epoch,10.0
train_accuracy,0.97902
train_loss,0.00305
val_accuracy,0.9639
val_loss,0.00561


[34m[1mwandb[0m: Agent Starting Run: in5a5e69 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


hl_4_bs_64_ac_tanh_e_5
Epoch 1: Train Loss=0.0146, Train Acc=0.9010, Val Loss=0.0105, Val Acc=0.9309
Epoch 2: Train Loss=0.0078, Train Acc=0.9480, Val Loss=0.0078, Val Acc=0.9494
Epoch 3: Train Loss=0.0061, Train Acc=0.9586, Val Loss=0.0069, Val Acc=0.9565
Epoch 4: Train Loss=0.0051, Train Acc=0.9663, Val Loss=0.0063, Val Acc=0.9586
Epoch 5: Train Loss=0.0042, Train Acc=0.9712, Val Loss=0.0061, Val Acc=0.9617


0,1
epoch,▁▃▅▆█
train_accuracy,▁▆▇██
train_loss,█▃▂▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_accuracy,0.97123
train_loss,0.00424
val_accuracy,0.9617
val_loss,0.00612


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7zt0cnxf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


hl_3_bs_64_ac_tanh_e_5
Epoch 1: Train Loss=0.0268, Train Acc=0.8179, Val Loss=0.0144, Val Acc=0.9059
Epoch 2: Train Loss=0.0135, Train Acc=0.9099, Val Loss=0.0121, Val Acc=0.9200
Epoch 3: Train Loss=0.0115, Train Acc=0.9234, Val Loss=0.0108, Val Acc=0.9279
Epoch 4: Train Loss=0.0102, Train Acc=0.9327, Val Loss=0.0100, Val Acc=0.9343
Epoch 5: Train Loss=0.0092, Train Acc=0.9392, Val Loss=0.0093, Val Acc=0.9387


0,1
epoch,▁▃▅▆█
train_accuracy,▁▆▇██
train_loss,█▃▂▁▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_accuracy,0.93922
train_loss,0.00921
val_accuracy,0.9387
val_loss,0.00931


[34m[1mwandb[0m: Agent Starting Run: srahms1a with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


hl_3_bs_64_ac_relu_e_5
Epoch 1: Train Loss=0.0184, Train Acc=0.8752, Val Loss=0.0115, Val Acc=0.9242
Epoch 2: Train Loss=0.0659, Train Acc=0.6554, Val Loss=0.1794, Val Acc=0.1032
Epoch 3: Train Loss=0.1802, Train Acc=0.0989, Val Loss=0.1808, Val Acc=0.0958
Epoch 4: Train Loss=0.1801, Train Acc=0.0994, Val Loss=0.1808, Val Acc=0.0958
Epoch 5: Train Loss=0.1801, Train Acc=0.0994, Val Loss=0.1773, Val Acc=0.1135


0,1
epoch,▁▃▅▆█
train_accuracy,█▆▁▁▁
train_loss,▁▃███
val_accuracy,█▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_accuracy,0.09942
train_loss,0.18012
val_accuracy,0.1135
val_loss,0.1773


[34m[1mwandb[0m: Agent Starting Run: 82eu02iv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


hl_4_bs_64_ac_tanh_e_5
Epoch 1: Train Loss=0.0151, Train Acc=0.8985, Val Loss=0.0116, Val Acc=0.9210
Epoch 2: Train Loss=0.0106, Train Acc=0.9295, Val Loss=0.0100, Val Acc=0.9330
Epoch 3: Train Loss=0.0092, Train Acc=0.9389, Val Loss=0.0091, Val Acc=0.9391
Epoch 4: Train Loss=0.0082, Train Acc=0.9449, Val Loss=0.0084, Val Acc=0.9433
Epoch 5: Train Loss=0.0075, Train Acc=0.9496, Val Loss=0.0079, Val Acc=0.9480


0,1
epoch,▁▃▅▆█
train_accuracy,▁▅▇▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_accuracy,0.94957
train_loss,0.00752
val_accuracy,0.948
val_loss,0.00793


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a4ismiyj with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


hl_4_bs_64_ac_sigmoid_e_10
Epoch 1: Train Loss=0.0415, Train Acc=0.7141, Val Loss=0.0258, Val Acc=0.8767
Epoch 2: Train Loss=0.0460, Train Acc=0.6445, Val Loss=0.0356, Val Acc=0.7742
Epoch 3: Train Loss=0.0348, Train Acc=0.7336, Val Loss=0.0312, Val Acc=0.7832
Epoch 4: Train Loss=0.0267, Train Acc=0.8092, Val Loss=0.0233, Val Acc=0.8190
Epoch 5: Train Loss=0.0209, Train Acc=0.8457, Val Loss=0.0202, Val Acc=0.8872
Epoch 6: Train Loss=0.0154, Train Acc=0.9087, Val Loss=0.0179, Val Acc=0.8905
Epoch 7: Train Loss=0.0138, Train Acc=0.9133, Val Loss=0.0148, Val Acc=0.9065
Epoch 8: Train Loss=0.0124, Train Acc=0.9196, Val Loss=0.0127, Val Acc=0.9195
Epoch 9: Train Loss=0.0114, Train Acc=0.9239, Val Loss=0.0115, Val Acc=0.9255
Epoch 10: Train Loss=0.0106, Train Acc=0.9283, Val Loss=0.0118, Val Acc=0.9230


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▃▁▃▅▆█████
train_loss,▇█▆▄▃▂▂▁▁▁
val_accuracy,▆▁▁▃▆▆▇███
val_loss,▅█▇▄▄▃▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.92826
train_loss,0.01065
val_accuracy,0.923
val_loss,0.01175


[34m[1mwandb[0m: Agent Starting Run: 7cbkazr1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


hl_4_bs_64_ac_sigmoid_e_15
Epoch 1: Train Loss=0.0337, Train Acc=0.7276, Val Loss=0.0144, Val Acc=0.9057
Epoch 2: Train Loss=0.0109, Train Acc=0.9274, Val Loss=0.0093, Val Acc=0.9393
Epoch 3: Train Loss=0.0084, Train Acc=0.9432, Val Loss=0.0082, Val Acc=0.9473
Epoch 4: Train Loss=0.0072, Train Acc=0.9505, Val Loss=0.0075, Val Acc=0.9511
Epoch 5: Train Loss=0.0064, Train Acc=0.9554, Val Loss=0.0076, Val Acc=0.9501
Epoch 6: Train Loss=0.0060, Train Acc=0.9575, Val Loss=0.0077, Val Acc=0.9489
Epoch 7: Train Loss=0.0057, Train Acc=0.9594, Val Loss=0.0081, Val Acc=0.9480
Epoch 8: Train Loss=0.0054, Train Acc=0.9613, Val Loss=0.0072, Val Acc=0.9529
Epoch 9: Train Loss=0.0051, Train Acc=0.9636, Val Loss=0.0073, Val Acc=0.9528
Epoch 10: Train Loss=0.0049, Train Acc=0.9641, Val Loss=0.0073, Val Acc=0.9531
Epoch 11: Train Loss=0.0047, Train Acc=0.9655, Val Loss=0.0068, Val Acc=0.9566
Epoch 12: Train Loss=0.0046, Train Acc=0.9665, Val Loss=0.0065, Val Acc=0.9560
Epoch 13: Train Loss=0.0045, Train

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▇▇▇███████████
train_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▅▆▇▇▆▆▇▇▇▇▇███
val_loss,█▄▃▂▂▂▃▂▂▂▂▁▁▂▁

0,1
epoch,15.0
train_accuracy,0.96732
train_loss,0.00447
val_accuracy,0.9615
val_loss,0.00616


[34m[1mwandb[0m: Agent Starting Run: mgntl0lu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


hl_4_bs_64_ac_sigmoid_e_10
Epoch 1: Train Loss=0.0857, Train Acc=0.2247, Val Loss=0.0724, Val Acc=0.3928
Epoch 2: Train Loss=0.0600, Train Acc=0.5198, Val Loss=0.0510, Val Acc=0.6374
Epoch 3: Train Loss=0.0475, Train Acc=0.7007, Val Loss=0.0449, Val Acc=0.7402
Epoch 4: Train Loss=0.0441, Train Acc=0.7499, Val Loss=0.0429, Val Acc=0.7768
Epoch 5: Train Loss=0.0417, Train Acc=0.7731, Val Loss=0.0400, Val Acc=0.7949
Epoch 6: Train Loss=0.0389, Train Acc=0.7943, Val Loss=0.0372, Val Acc=0.8082
Epoch 7: Train Loss=0.0365, Train Acc=0.8075, Val Loss=0.0350, Val Acc=0.8158
Epoch 8: Train Loss=0.0337, Train Acc=0.8173, Val Loss=0.0320, Val Acc=0.8228
Epoch 9: Train Loss=0.0309, Train Acc=0.8238, Val Loss=0.0294, Val Acc=0.8280
Epoch 10: Train Loss=0.0287, Train Acc=0.8296, Val Loss=0.0277, Val Acc=0.8331


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▇▇▇█████
train_loss,█▅▃▃▃▂▂▂▁▁
val_accuracy,▁▅▇▇▇█████
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_accuracy,0.82961
train_loss,0.0287
val_accuracy,0.8331
val_loss,0.02771


[34m[1mwandb[0m: Agent Starting Run: ndi36d48 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


hl_3_bs_64_ac_relu_e_5
Epoch 1: Train Loss=0.0321, Train Acc=0.7808, Val Loss=0.0359, Val Acc=0.7499
Epoch 2: Train Loss=0.0190, Train Acc=0.8729, Val Loss=0.0220, Val Acc=0.8530
Epoch 3: Train Loss=0.0180, Train Acc=0.8806, Val Loss=0.0206, Val Acc=0.8632
Epoch 4: Train Loss=0.0180, Train Acc=0.8819, Val Loss=0.0195, Val Acc=0.8714
Epoch 5: Train Loss=0.0177, Train Acc=0.8840, Val Loss=0.0186, Val Acc=0.8791


0,1
epoch,▁▃▅▆█
train_accuracy,▁▇███
train_loss,█▂▁▁▁
val_accuracy,▁▇▇██
val_loss,█▂▂▁▁

0,1
epoch,5.0
train_accuracy,0.88399
train_loss,0.01772
val_accuracy,0.8791
val_loss,0.01862


[34m[1mwandb[0m: Agent Starting Run: 5iov0ch7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


hl_3_bs_32_ac_sigmoid_e_10
Epoch 1: Train Loss=0.0354, Train Acc=0.7557, Val Loss=0.0292, Val Acc=0.8075
Epoch 2: Train Loss=0.0246, Train Acc=0.8217, Val Loss=0.0244, Val Acc=0.8270
Epoch 3: Train Loss=0.0198, Train Acc=0.8576, Val Loss=0.0142, Val Acc=0.9160
Epoch 4: Train Loss=0.0135, Train Acc=0.9135, Val Loss=0.0131, Val Acc=0.9169
Epoch 5: Train Loss=0.0121, Train Acc=0.9192, Val Loss=0.0121, Val Acc=0.9208
Epoch 6: Train Loss=0.0114, Train Acc=0.9224, Val Loss=0.0113, Val Acc=0.9255
Epoch 7: Train Loss=0.0107, Train Acc=0.9266, Val Loss=0.0111, Val Acc=0.9286
Epoch 8: Train Loss=0.0104, Train Acc=0.9294, Val Loss=0.0115, Val Acc=0.9241
Epoch 9: Train Loss=0.0102, Train Acc=0.9305, Val Loss=0.0117, Val Acc=0.9258
Epoch 10: Train Loss=0.0102, Train Acc=0.9297, Val Loss=0.0111, Val Acc=0.9273


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▇██████
train_loss,█▅▄▂▂▁▁▁▁▁
val_accuracy,▁▂▇▇██████
val_loss,█▆▂▂▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.9297
train_loss,0.0102
val_accuracy,0.9273
val_loss,0.01108


[34m[1mwandb[0m: Agent Starting Run: 0tsrgh18 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


hl_3_bs_64_ac_tanh_e_5
Epoch 1: Train Loss=0.0586, Train Acc=0.5612, Val Loss=0.0402, Val Acc=0.7272
Epoch 2: Train Loss=0.0327, Train Acc=0.7788, Val Loss=0.0273, Val Acc=0.8138
Epoch 3: Train Loss=0.0237, Train Acc=0.8427, Val Loss=0.0204, Val Acc=0.8649
Epoch 4: Train Loss=0.0188, Train Acc=0.8738, Val Loss=0.0177, Val Acc=0.8820
Epoch 5: Train Loss=0.0165, Train Acc=0.8894, Val Loss=0.0158, Val Acc=0.8957


0,1
epoch,▁▃▅▆█
train_accuracy,▁▆▇██
train_loss,█▄▂▁▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_accuracy,0.88937
train_loss,0.01648
val_accuracy,0.8957
val_loss,0.01585


[34m[1mwandb[0m: Agent Starting Run: oy52tdvk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 0.95
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


hl_4_bs_32_ac_sigmoid_e_5
Epoch 1: Train Loss=0.0883, Train Acc=0.1383, Val Loss=0.0822, Val Acc=0.2086
Epoch 2: Train Loss=0.0816, Train Acc=0.2183, Val Loss=0.0812, Val Acc=0.2253
Epoch 3: Train Loss=0.0771, Train Acc=0.3072, Val Loss=0.0749, Val Acc=0.3262
Epoch 4: Train Loss=0.0781, Train Acc=0.2730, Val Loss=0.0804, Val Acc=0.3032
Epoch 5: Train Loss=0.0800, Train Acc=0.2547, Val Loss=0.0791, Val Acc=0.2098


0,1
epoch,▁▃▅▆█
train_accuracy,▁▄█▇▆
train_loss,█▄▁▂▃
val_accuracy,▁▂█▇▁
val_loss,█▇▁▆▅

0,1
epoch,5.0
train_accuracy,0.25467
train_loss,0.07996
val_accuracy,0.2098
val_loss,0.07912
