In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.datasets import mnist
import wandb
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [4]:
!pip install wandb



In [3]:
import wandb
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mviinod9[0m ([33mviinod9-iitm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [7]:
import wandb
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist

# Initialize WandB
wandb.init(project="Vinod_FashionMNIST", name="One_Image_Per_Class", reinit=True)

# Load Fashion MNIST dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# Define class labels
fmnist_labels = {
    0: 'T-shirt/top',
    1: 'Trouser',
    2: 'Pullover',
    3: 'Dress',
    4: 'Coat',
    5: 'Sandal',
    6: 'Shirt',
    7: 'Sneaker',
    8: 'Bag',
    9: 'Ankle boot'
}

# Select **one unique image per class**
samples = []
for class_idx in range(10):
    index = np.where(y_train == class_idx)[0][0]  # Get first occurrence of class
    image = wandb.Image(x_train[index], caption=fmnist_labels[class_idx])
    samples.append(image)

# Log **all 10 images in one step**
wandb.log({"Fashion MNIST Classes": samples})

# Finish WandB run
wandb.finish()


In [6]:
# Load and preprocess dataset
def Data_Preprocess():
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

    # Split into train and validation
    val_size = 5000
    x_val, y_val = x_train[:val_size], y_train[:val_size]
    x_train, y_train = x_train[val_size:], y_train[val_size:]

    # Normalize dataset
    x_train, x_val, x_test = x_train / 255.0, x_val / 255.0, x_test / 255.0

    # One-hot encoding
    y_train = to_categorical(y_train, 10)
    y_val = to_categorical(y_val, 10)
    y_test = to_categorical(y_test, 10)

    return x_train, y_train, x_val, y_val, x_test, y_test




# Load data
x_train, y_train, x_val, y_val, x_test, y_test = Data_Preprocess()

# Define necessary functions
def Weights_Initialization(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size):
    weights = {}
    prev_size = input_size
    hidden_size = num_nodes_hidden_layers[0] if isinstance(num_nodes_hidden_layers, list) else num_nodes_hidden_layers

    for i in range(num_hidden_layer):
        if weight == 'random':
            weights[f'W{i+1}'] = np.random.randn(prev_size, hidden_size) * 0.01
        elif weight == 'xavier':
            weights[f'W{i+1}'] = np.random.randn(prev_size, hidden_size) * np.sqrt(1 / prev_size)
        weights[f'b{i+1}'] = np.zeros((1, hidden_size))
        prev_size = hidden_size

    if weight == 'random':
        weights['W_out'] = np.random.randn(prev_size, output_size) * 0.01
    elif weight == 'xavier':
        weights['W_out'] = np.random.randn(prev_size, output_size) * np.sqrt(1 / prev_size)
    weights['b_out'] = np.zeros((1, output_size))

    return weights

def Activation_Function(Z, activation):
    if activation == 'sigmoid':
        return 1 / (1 + np.exp(-Z))
    elif activation == 'tanh':
        return np.tanh(Z)
    elif activation == 'relu':
        return np.maximum(0, Z)
    elif activation == 'softmax':
        expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return expZ / np.sum(expZ, axis=1, keepdims=True)

def Derrivative_Activation(Z, activation):
    if activation == 'sigmoid':
        sig = Activation_Function(Z, 'sigmoid')
        return sig * (1 - sig)
    elif activation == 'tanh':
        return 1 - np.tanh(Z)**2
    elif activation == 'relu':
        return (Z > 0).astype(float)

def Cross_Entropy_Loss(y_actual, y_pred):
    return -np.mean(y_actual * np.log(y_pred + 1e-9))


def MSE_Loss(y_actual, y_pred):
    return np.mean((y_actual - y_pred) ** 2)


def Forward_Propogation(X, weights, num_hidden_layer, activation):
    A = X.reshape(X.shape[0], -1)
    cache = {'A0': A}

    for i in range(num_hidden_layer):
        Z = np.dot(A, weights[f'W{i+1}']) + weights[f'b{i+1}']
        A = Activation_Function(Z, activation)
        cache[f'Z{i+1}'] = Z
        cache[f'A{i+1}'] = A

    Z_out = np.dot(A, weights['W_out']) + weights['b_out']
    A_out = Activation_Function(Z_out, 'softmax')

    cache['Z_out'] = Z_out
    cache['A_out'] = A_out

    return A_out, cache

def Back_Propogation(X, y_actual, weights, cache, num_hidden_layer, activation):
    gradients = {}
    m = X.shape[0]

    dZ_out = cache['A_out'] - y_actual
    gradients['dW_out'] = np.dot(cache[f'A{num_hidden_layer}'].T, dZ_out) / m
    gradients['db_out'] = np.sum(dZ_out, axis=0, keepdims=True) / m

    dA = np.dot(dZ_out, weights['W_out'].T)

    for i in range(num_hidden_layer, 0, -1):
        dZ = dA * Derrivative_Activation(cache[f'Z{i}'], activation)
        gradients[f'dW{i}'] = np.dot(cache[f'A{i-1}'].T, dZ) / m
        gradients[f'db{i}'] = np.sum(dZ, axis=0, keepdims=True) / m
        dA = np.dot(dZ, weights[f'W{i}'].T)

    return gradients

def Calculate_Accuracy(X, y_actual, weights, num_hidden_layer, activation):
    y_pred, _ = Forward_Propogation(X, weights, num_hidden_layer, activation)
    return np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_actual, axis=1))

def Stochastic_GD(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, loss_function='cross_entropy'):
    wandb.init(project="stochastic")
    weights = Weights_Initialization(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]

            y_pred, cache = Forward_Propogation(X_batch, weights, num_hidden_layer, activation)
            gradients = Back_Propogation(X_batch, y_batch, weights, cache, num_hidden_layer, activation)

            for key in weights:
                weights[key] -= lr * gradients[f'd{key}']

        train_acc = Calculate_Accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = Calculate_Accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = Cross_Entropy_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = Cross_Entropy_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = MSE_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = MSE_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights




# # Set parameters and train
# num_hidden_layer = 4
# num_nodes_hidden_layers = [128]
# weight = 'random'
# input_size = 28 * 28  # Flattened image size
# output_size = 10  # Number of classes
# lr = 0.01
# batch_size = 64
# epochs = 5
# activation = 'sigmoid'

# trained_weights = Stochastic_GD(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, loss_function='cross_entropy')
# wandb.finish()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
# Modify Momentum_GD to use either cross-entropy or MSE loss

def Momentum_GD(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, momentum=0.9, loss_function='cross_entropy'):
    wandb.init(project="momentum")
    weights = Weights_Initialization(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    velocity = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache = Forward_Propogation(X_batch, weights, num_hidden_layer, activation)
            gradients = Back_Propogation(X_batch, y_batch, weights, cache, num_hidden_layer, activation)

            for key in weights:
                velocity[key] = momentum * velocity[key] - lr * gradients[f'd{key}']
                weights[key] += velocity[key]

        train_acc = Calculate_Accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = Calculate_Accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = Cross_Entropy_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = Cross_Entropy_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = MSE_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = MSE_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage


# Set parameters and train
# num_hidden_layer = 3
# num_nodes_hidden_layers = [128]
# weight = 'random'
# input_size = 28 * 28  # Flattened image size
# output_size = 10  # Number of classes
# lr = 0.01
# batch_size = 64
# epochs = 1
# activation = 'sigmoid'
# trained_weights1 = Momentum_GD(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, momentum=0.9, loss_function='cross_entropy')
# wandb.finish()


In [8]:
def Nesterov_GD(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, momentum=0.9, loss_function='cross_entropy'):
    wandb.init(project="nesterov")
    weights = Weights_Initialization(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    velocity = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            temp_weights = {key: weights[key] + momentum * velocity[key] for key in weights}
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache = Forward_Propogation(X_batch, temp_weights, num_hidden_layer, activation)
            gradients = Back_Propogation(X_batch, y_batch, temp_weights, cache, num_hidden_layer, activation)

            for key in weights:
                velocity[key] = momentum * velocity[key] - lr * gradients[f'd{key}']
                weights[key] += velocity[key]

        train_acc = Calculate_Accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = Calculate_Accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = Cross_Entropy_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = Cross_Entropy_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = MSE_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = MSE_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage


# # Set parameters and train
# num_hidden_layer = 5
# num_nodes_hidden_layers = [128]
# weight = 'random'
# input_size = 28 * 28  # Flattened image size
# output_size = 10  # Number of classes
# lr = 0.01
# batch_size = 64
# epochs = 1
# activation = 'sigmoid'

# trained_weights2 = Nesterov_GD(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, momentum=0.9, loss_function='cross_entropy')
# wandb.finish()


In [9]:
def RMS_Opt(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta=0.9, epsilon=1e-8, loss_function='cross_entropy'):
    wandb.init(project="rmsprop")
    weights = Weights_Initialization(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    cache = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache_forward = Forward_Propogation(X_batch, weights, num_hidden_layer, activation)
            gradients = Back_Propogation(X_batch, y_batch, weights, cache_forward, num_hidden_layer, activation)

            for key in weights:
                cache[key] = beta * cache[key] + (1 - beta) * gradients[f'd{key}']**2
                weights[key] -= lr * gradients[f'd{key}'] / (np.sqrt(cache[key]) + epsilon)

        train_acc = Calculate_Accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = Calculate_Accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = Cross_Entropy_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = Cross_Entropy_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = MSE_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = MSE_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage

# # Set parameters and train
# num_hidden_layer = 4
# num_nodes_hidden_layers = [128]
# weight = 'random'
# input_size = 28 * 28  # Flattened image size
# output_size = 10  # Number of classes
# lr = 0.01
# batch_size = 64
# epochs = 1
# activation = 'sigmoid'


# trained_weights3 = RMS_Opt(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta=0.9, epsilon=1e-8, loss_function='cross_entropy')
# wandb.finish()

In [10]:
def Adam_Opt(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy'):
    wandb.init(project="adam")
    weights = Weights_Initialization(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    m = {key: np.zeros_like(value) for key, value in weights.items()}
    v = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache_forward = Forward_Propogation(X_batch, weights, num_hidden_layer, activation)
            gradients = Back_Propogation(X_batch, y_batch, weights, cache_forward, num_hidden_layer, activation)

            for key in weights:
                m[key] = beta1 * m[key] + (1 - beta1) * gradients[f'd{key}']
                v[key] = beta2 * v[key] + (1 - beta2) * (gradients[f'd{key}'] ** 2)
                m_hat = m[key] / (1 - beta1 ** (epoch + 1))
                v_hat = v[key] / (1 - beta2 ** (epoch + 1))
                weights[key] -= lr * m_hat / (np.sqrt(v_hat) + epsilon)

        train_acc = Calculate_Accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = Calculate_Accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = Cross_Entropy_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = Cross_Entropy_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = MSE_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = MSE_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage

# Set parameters and train
# num_hidden_layer = 4
# num_nodes_hidden_layers = [128]
# weight = 'random'
# input_size = 28 * 28  # Flattened image size
# output_size = 10  # Number of classes
# lr = 0.01
# batch_size = 64
# epochs = 1
# activation = 'sigmoid'


# trained_weights4 = Adam_Opt(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy')
# wandb.finish()


In [11]:
def NAdam_Opt(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy'):
    wandb.init(project="nadam")
    weights = Weights_Initialization(num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size)
    m = {key: np.zeros_like(value) for key, value in weights.items()}
    v = {key: np.zeros_like(value) for key, value in weights.items()}

    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            X_batch = x_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            y_pred, cache_forward = Forward_Propogation(X_batch, weights, num_hidden_layer, activation)
            gradients = Back_Propogation(X_batch, y_batch, weights, cache_forward, num_hidden_layer, activation)

            for key in weights:
                m[key] = beta1 * m[key] + (1 - beta1) * gradients[f'd{key}']
                v[key] = beta2 * v[key] + (1 - beta2) * (gradients[f'd{key}'] ** 2)
                m_hat = m[key] / (1 - beta1 ** (epoch + 1))
                v_hat = v[key] / (1 - beta2 ** (epoch + 1))
                nadam_update = beta1 * m_hat + (1 - beta1) * gradients[f'd{key}']
                weights[key] -= lr * nadam_update / (np.sqrt(v_hat) + epsilon)

        train_acc = Calculate_Accuracy(x_train, y_train, weights, num_hidden_layer, activation)
        val_acc = Calculate_Accuracy(x_val, y_val, weights, num_hidden_layer, activation)

        # Select loss function dynamically
        if loss_function == 'cross_entropy':
            train_loss = Cross_Entropy_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = Cross_Entropy_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])
        elif loss_function == 'mse':
            train_loss = MSE_Loss(y_train, Forward_Propogation(x_train, weights, num_hidden_layer, activation)[0])
            val_loss = MSE_Loss(y_val, Forward_Propogation(x_val, weights, num_hidden_layer, activation)[0])

        print(f"Epoch {epoch+1}: Train Acc={train_acc:.4f}, Train Loss={train_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")
        wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, "epoch": epoch + 1})

    return weights

# Example usage

# Set parameters and train
# num_hidden_layer = 4
# num_nodes_hidden_layers = [128]
# weight = 'random'
# input_size = 28 * 28  # Flattened image size
# output_size = 10  # Number of classes
# lr = 0.01
# batch_size = 64
# epochs = 4
# activation = 'sigmoid'

# trained_weights5 = NAdam_Opt(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy')
# wandb.finish()


#Question - 4 , 5 , 6

In [26]:

def main():
    sweep_config = {
        'method': 'bayes',
        'metric': {'name': 'accuracy', 'goal': 'maximize'},
        'parameters': {
            'epochs': {'values': [5, 10]},
            'num_layers': {'values': [3, 4, 5]},
            'hidden_size': {'values': [32, 64, 128]},
            'weight_decay': {'values': [0, 0.0005, 0.5]},
            'learning_rate': {'values': [1e-3, 1e-4]},
            'optimizer': {'values': ['stochastic', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']},
            'batch_size': {'values': [16, 32, 64]},
            'weight_init': {'values': ['random', 'xavier']},
            'activation': {'values': ['sigmoid', 'tanh', 'relu']},
        }
    }
    sweep_id = wandb.sweep(sweep_config, project="Vinod_Assgnment1_Question456")
    wandb.agent(sweep_id, function=train, count=350)

def train():
    wandb.init(project="Vinod_Assignment1_Question456")
    # wandb.init()
    config = wandb.config
    run_name = f"Opt-{config.optimizer}_Layers-{config.num_layers}_HS-{config.hidden_size}_LR-{config.learning_rate}_Batch-{config.batch_size}_Act-{config.activation}"
    wandb.run.name = run_name

    # x_train, y_train, x_val, y_val, _, _ = Data_Preprocess()

    optimizer = config.optimizer

    if optimizer == 'stochastic':
        trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
        # trained_weights = Stochastic_GD(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size)
    elif optimizer == 'momentum':
        trained_weights = Momentum_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'nag':
        trained_weights = Nesterov_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'rmsprop':
        trained_weights = RMS_Opt(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'adam':
        trained_weights = Adam_Opt(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'nadam':
        trained_weights = NAdam_Opt(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')

    #wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss})

    wandb.finish()

if __name__ == "__main__":
    main()


Create sweep with ID: bfl29i0j
Sweep URL: https://wandb.ai/viinod9-iitm/Vinod_Assgnment1_Question456/sweeps/bfl29i0j


[34m[1mwandb[0m: Agent Starting Run: j9osbt9i with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8521, Train Loss=0.0421, Val Acc=0.8534, Val Loss=0.0427
Epoch 2: Train Acc=0.8649, Train Loss=0.0378, Val Acc=0.8624, Val Loss=0.0395
Epoch 3: Train Acc=0.8744, Train Loss=0.0350, Val Acc=0.8658, Val Loss=0.0371
Epoch 4: Train Acc=0.8797, Train Loss=0.0336, Val Acc=0.8714, Val Loss=0.0363
Epoch 5: Train Acc=0.8827, Train Loss=0.0329, Val Acc=0.8750, Val Loss=0.0359


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▅▇█
val_loss,█▅▂▁▁

0,1
epoch,5.0
train_acc,0.88269
train_loss,0.03288
val_acc,0.875
val_loss,0.03594


[34m[1mwandb[0m: Agent Starting Run: v63d2ms6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▅▄▄▃▃▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▄▅▅▆▇▇██

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: b86s39u7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.8453, Train Loss=0.0453, Val Acc=0.8442, Val Loss=0.0462
Epoch 2: Train Acc=0.8583, Train Loss=0.0404, Val Acc=0.8568, Val Loss=0.0425
Epoch 3: Train Acc=0.8618, Train Loss=0.0396, Val Acc=0.8566, Val Loss=0.0426
Epoch 4: Train Acc=0.8783, Train Loss=0.0344, Val Acc=0.8714, Val Loss=0.0380
Epoch 5: Train Acc=0.8836, Train Loss=0.0330, Val Acc=0.8696, Val Loss=0.0375


0,1
epoch,▁▃▅▆█
train_acc,▁▃▄▇█
train_loss,█▅▅▂▁
val_acc,▁▄▄██
val_loss,█▅▅▁▁

0,1
epoch,5.0
train_acc,0.88358
train_loss,0.03303
val_acc,0.8696
val_loss,0.03747


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lmntwap5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2077, Train Loss=0.1673, Val Acc=0.2052, Val Loss=0.1671
Epoch 2: Train Acc=0.4006, Train Loss=0.1321, Val Acc=0.4080, Val Loss=0.1319
Epoch 3: Train Acc=0.5163, Train Loss=0.1129, Val Acc=0.5186, Val Loss=0.1127
Epoch 4: Train Acc=0.6755, Train Loss=0.0871, Val Acc=0.6828, Val Loss=0.0867
Epoch 5: Train Acc=0.7569, Train Loss=0.0696, Val Acc=0.7642, Val Loss=0.0696


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▇█
train_loss,█▅▄▂▁
val_acc,▁▄▅▇█
val_loss,█▅▄▂▁

0,1
epoch,5.0
train_acc,0.75687
train_loss,0.06959
val_acc,0.7642
val_loss,0.06963


[34m[1mwandb[0m: Agent Starting Run: kdsuu6dh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 2: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 3: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 4: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 5: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23051
val_acc,0.0976
val_loss,0.2306


[34m[1mwandb[0m: Agent Starting Run: lrcqdjid with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8325, Train Loss=0.0478, Val Acc=0.8354, Val Loss=0.0470
Epoch 2: Train Acc=0.8515, Train Loss=0.0414, Val Acc=0.8558, Val Loss=0.0408
Epoch 3: Train Acc=0.8612, Train Loss=0.0386, Val Acc=0.8618, Val Loss=0.0385
Epoch 4: Train Acc=0.8671, Train Loss=0.0368, Val Acc=0.8666, Val Loss=0.0371
Epoch 5: Train Acc=0.8725, Train Loss=0.0355, Val Acc=0.8712, Val Loss=0.0362


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.87251
train_loss,0.03545
val_acc,0.8712
val_loss,0.03621


[34m[1mwandb[0m: Agent Starting Run: od3goask with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23032
val_acc,0.0976
val_loss,0.23039


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rd7mzocu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁█████████
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁█████████

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: 6cfnxrxi with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8590, Train Loss=0.0394, Val Acc=0.8566, Val Loss=0.0395
Epoch 2: Train Acc=0.8725, Train Loss=0.0351, Val Acc=0.8684, Val Loss=0.0363
Epoch 3: Train Acc=0.8803, Train Loss=0.0328, Val Acc=0.8718, Val Loss=0.0349
Epoch 4: Train Acc=0.8853, Train Loss=0.0313, Val Acc=0.8756, Val Loss=0.0339
Epoch 5: Train Acc=0.8893, Train Loss=0.0301, Val Acc=0.8790, Val Loss=0.0332
Epoch 6: Train Acc=0.8935, Train Loss=0.0291, Val Acc=0.8838, Val Loss=0.0327
Epoch 7: Train Acc=0.8956, Train Loss=0.0284, Val Acc=0.8832, Val Loss=0.0324
Epoch 8: Train Acc=0.8969, Train Loss=0.0279, Val Acc=0.8852, Val Loss=0.0326
Epoch 9: Train Acc=0.8990, Train Loss=0.0274, Val Acc=0.8872, Val Loss=0.0326
Epoch 10: Train Acc=0.9009, Train Loss=0.0267, Val Acc=0.8880, Val Loss=0.0325


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▅▆▇▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▄▄▅▆▇▇▇██
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.90095
train_loss,0.02674
val_acc,0.888
val_loss,0.03245


[34m[1mwandb[0m: Agent Starting Run: v0ersv0x with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.8055, Train Loss=0.0533, Val Acc=0.8070, Val Loss=0.0541
Epoch 2: Train Acc=0.8441, Train Loss=0.0446, Val Acc=0.8402, Val Loss=0.0452
Epoch 3: Train Acc=0.8584, Train Loss=0.0404, Val Acc=0.8562, Val Loss=0.0414
Epoch 4: Train Acc=0.8692, Train Loss=0.0370, Val Acc=0.8646, Val Loss=0.0387
Epoch 5: Train Acc=0.8770, Train Loss=0.0348, Val Acc=0.8724, Val Loss=0.0372


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.87704
train_loss,0.03477
val_acc,0.8724
val_loss,0.03721


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9n8k29nh with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5811, Train Loss=0.1024, Val Acc=0.5914, Val Loss=0.1021
Epoch 2: Train Acc=0.6166, Train Loss=0.0969, Val Acc=0.6252, Val Loss=0.0966
Epoch 3: Train Acc=0.6450, Train Loss=0.0926, Val Acc=0.6542, Val Loss=0.0924
Epoch 4: Train Acc=0.6624, Train Loss=0.0883, Val Acc=0.6716, Val Loss=0.0882
Epoch 5: Train Acc=0.6705, Train Loss=0.0852, Val Acc=0.6782, Val Loss=0.0852


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▆▄▂▁
val_acc,▁▄▆▇█
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_acc,0.67053
train_loss,0.0852
val_acc,0.6782
val_loss,0.08516


[34m[1mwandb[0m: Agent Starting Run: nhy5c0b2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7785, Train Loss=0.0743, Val Acc=0.7806, Val Loss=0.0733
Epoch 2: Train Acc=0.8179, Train Loss=0.0539, Val Acc=0.8264, Val Loss=0.0522
Epoch 3: Train Acc=0.8369, Train Loss=0.0474, Val Acc=0.8430, Val Loss=0.0461
Epoch 4: Train Acc=0.8459, Train Loss=0.0442, Val Acc=0.8476, Val Loss=0.0434
Epoch 5: Train Acc=0.8533, Train Loss=0.0421, Val Acc=0.8518, Val Loss=0.0417


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▂▁▁
val_acc,▁▆▇██
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,0.85325
train_loss,0.04207
val_acc,0.8518
val_loss,0.04174


[34m[1mwandb[0m: Agent Starting Run: yz7ela4b with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 2: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 3: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 4: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 5: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23051
val_acc,0.0976
val_loss,0.2306


[34m[1mwandb[0m: Agent Starting Run: u26ht99d with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7697, Train Loss=0.0671, Val Acc=0.7718, Val Loss=0.0666
Epoch 2: Train Acc=0.7988, Train Loss=0.0591, Val Acc=0.7980, Val Loss=0.0585
Epoch 3: Train Acc=0.8111, Train Loss=0.0551, Val Acc=0.8114, Val Loss=0.0545
Epoch 4: Train Acc=0.8189, Train Loss=0.0526, Val Acc=0.8186, Val Loss=0.0520
Epoch 5: Train Acc=0.8246, Train Loss=0.0507, Val Acc=0.8278, Val Loss=0.0501
Epoch 6: Train Acc=0.8290, Train Loss=0.0493, Val Acc=0.8306, Val Loss=0.0487
Epoch 7: Train Acc=0.8323, Train Loss=0.0481, Val Acc=0.8366, Val Loss=0.0476
Epoch 8: Train Acc=0.8346, Train Loss=0.0471, Val Acc=0.8398, Val Loss=0.0467
Epoch 9: Train Acc=0.8369, Train Loss=0.0463, Val Acc=0.8444, Val Loss=0.0459
Epoch 10: Train Acc=0.8391, Train Loss=0.0456, Val Acc=0.8466, Val Loss=0.0452


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▃▅▅▆▇▇▇██
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.83911
train_loss,0.04561
val_acc,0.8466
val_loss,0.04517


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ku9jyuno with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.2415, Train Loss=0.2143, Val Acc=0.2310, Val Loss=0.2147
Epoch 2: Train Acc=0.4506, Train Loss=0.1664, Val Acc=0.4494, Val Loss=0.1662
Epoch 3: Train Acc=0.6243, Train Loss=0.1025, Val Acc=0.6332, Val Loss=0.1017
Epoch 4: Train Acc=0.6878, Train Loss=0.0839, Val Acc=0.6948, Val Loss=0.0833
Epoch 5: Train Acc=0.7205, Train Loss=0.0753, Val Acc=0.7260, Val Loss=0.0747


0,1
epoch,▁▃▅▆█
train_acc,▁▄▇██
train_loss,█▆▂▁▁
val_acc,▁▄▇██
val_loss,█▆▂▁▁

0,1
epoch,5.0
train_acc,0.72049
train_loss,0.07527
val_acc,0.726
val_loss,0.07475


[34m[1mwandb[0m: Agent Starting Run: usv5lpy5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4429, Train Loss=0.2000, Val Acc=0.4392, Val Loss=0.1995
Epoch 2: Train Acc=0.5259, Train Loss=0.1781, Val Acc=0.5216, Val Loss=0.1770
Epoch 3: Train Acc=0.5755, Train Loss=0.1605, Val Acc=0.5748, Val Loss=0.1591
Epoch 4: Train Acc=0.6099, Train Loss=0.1468, Val Acc=0.6102, Val Loss=0.1454
Epoch 5: Train Acc=0.6338, Train Loss=0.1361, Val Acc=0.6378, Val Loss=0.1347
Epoch 6: Train Acc=0.6520, Train Loss=0.1275, Val Acc=0.6548, Val Loss=0.1261
Epoch 7: Train Acc=0.6657, Train Loss=0.1203, Val Acc=0.6698, Val Loss=0.1190
Epoch 8: Train Acc=0.6769, Train Loss=0.1143, Val Acc=0.6818, Val Loss=0.1130
Epoch 9: Train Acc=0.6852, Train Loss=0.1091, Val Acc=0.6894, Val Loss=0.1078
Epoch 10: Train Acc=0.6923, Train Loss=0.1045, Val Acc=0.6980, Val Loss=0.1033


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▆▆▇▇███
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▃▅▆▆▇▇███
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.69231
train_loss,0.10452
val_acc,0.698
val_loss,0.10333


[34m[1mwandb[0m: Agent Starting Run: fjanz1i1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1733, Train Loss=0.2297, Val Acc=0.1682, Val Loss=0.2298
Epoch 2: Train Acc=0.2328, Train Loss=0.2291, Val Acc=0.2326, Val Loss=0.2292
Epoch 3: Train Acc=0.2829, Train Loss=0.2283, Val Acc=0.2776, Val Loss=0.2283
Epoch 4: Train Acc=0.3162, Train Loss=0.2268, Val Acc=0.3106, Val Loss=0.2267
Epoch 5: Train Acc=0.3466, Train Loss=0.2237, Val Acc=0.3484, Val Loss=0.2237


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▇█
train_loss,█▇▆▅▁
val_acc,▁▄▅▇█
val_loss,█▇▆▅▁

0,1
epoch,5.0
train_acc,0.34656
train_loss,0.22374
val_acc,0.3484
val_loss,0.22368


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xsb0pmk9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8571, Train Loss=0.0410, Val Acc=0.8554, Val Loss=0.0406
Epoch 2: Train Acc=0.8709, Train Loss=0.0364, Val Acc=0.8688, Val Loss=0.0371
Epoch 3: Train Acc=0.8786, Train Loss=0.0341, Val Acc=0.8748, Val Loss=0.0354
Epoch 4: Train Acc=0.8841, Train Loss=0.0324, Val Acc=0.8768, Val Loss=0.0343
Epoch 5: Train Acc=0.8887, Train Loss=0.0312, Val Acc=0.8774, Val Loss=0.0336


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▇██
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.88871
train_loss,0.0312
val_acc,0.8774
val_loss,0.03356


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fyh494w4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8618, Train Loss=0.0381, Val Acc=0.8592, Val Loss=0.0387
Epoch 2: Train Acc=0.8762, Train Loss=0.0341, Val Acc=0.8712, Val Loss=0.0355
Epoch 3: Train Acc=0.8831, Train Loss=0.0321, Val Acc=0.8756, Val Loss=0.0341
Epoch 4: Train Acc=0.8884, Train Loss=0.0308, Val Acc=0.8796, Val Loss=0.0332
Epoch 5: Train Acc=0.8917, Train Loss=0.0297, Val Acc=0.8818, Val Loss=0.0325


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.89167
train_loss,0.02971
val_acc,0.8818
val_loss,0.03254


[34m[1mwandb[0m: Agent Starting Run: 7ieh3mfh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2003, Train Loss=0.1681, Val Acc=0.1988, Val Loss=0.1679
Epoch 2: Train Acc=0.2018, Train Loss=0.1671, Val Acc=0.2000, Val Loss=0.1668
Epoch 3: Train Acc=0.2128, Train Loss=0.1659, Val Acc=0.2244, Val Loss=0.1658
Epoch 4: Train Acc=0.2270, Train Loss=0.1627, Val Acc=0.2398, Val Loss=0.1625
Epoch 5: Train Acc=0.3675, Train Loss=0.1552, Val Acc=0.3808, Val Loss=0.1550
Epoch 6: Train Acc=0.3924, Train Loss=0.1455, Val Acc=0.4012, Val Loss=0.1452
Epoch 7: Train Acc=0.4001, Train Loss=0.1366, Val Acc=0.4074, Val Loss=0.1363
Epoch 8: Train Acc=0.4103, Train Loss=0.1290, Val Acc=0.4182, Val Loss=0.1287
Epoch 9: Train Acc=0.4266, Train Loss=0.1233, Val Acc=0.4348, Val Loss=0.1230
Epoch 10: Train Acc=0.4372, Train Loss=0.1194, Val Acc=0.4454, Val Loss=0.1191


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▂▆▇▇▇██
train_loss,███▇▆▅▃▂▂▁
val_acc,▁▁▂▂▆▇▇▇██
val_loss,███▇▆▅▃▂▂▁

0,1
epoch,10.0
train_acc,0.43724
train_loss,0.11942
val_acc,0.4454
val_loss,0.11915


[34m[1mwandb[0m: Agent Starting Run: 5dk06zin with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▆▅▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.2303
val_acc,0.0976
val_loss,0.23035


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tnhfyzdm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▅▄▃▂▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▃▄▅▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h38qjlqz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6291, Train Loss=0.0974, Val Acc=0.6332, Val Loss=0.0973
Epoch 2: Train Acc=0.6747, Train Loss=0.0859, Val Acc=0.6788, Val Loss=0.0861
Epoch 3: Train Acc=0.6961, Train Loss=0.0815, Val Acc=0.6990, Val Loss=0.0817
Epoch 4: Train Acc=0.7297, Train Loss=0.0773, Val Acc=0.7318, Val Loss=0.0774
Epoch 5: Train Acc=0.7513, Train Loss=0.0711, Val Acc=0.7526, Val Loss=0.0716
Epoch 6: Train Acc=0.7591, Train Loss=0.0683, Val Acc=0.7608, Val Loss=0.0689
Epoch 7: Train Acc=0.7666, Train Loss=0.0661, Val Acc=0.7680, Val Loss=0.0667
Epoch 8: Train Acc=0.7758, Train Loss=0.0638, Val Acc=0.7732, Val Loss=0.0643
Epoch 9: Train Acc=0.7854, Train Loss=0.0614, Val Acc=0.7834, Val Loss=0.0619
Epoch 10: Train Acc=0.7982, Train Loss=0.0580, Val Acc=0.7968, Val Loss=0.0585


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇▇█
train_loss,█▆▅▄▃▃▂▂▂▁
val_acc,▁▃▄▅▆▆▇▇▇█
val_loss,█▆▅▄▃▃▂▂▂▁

0,1
epoch,10.0
train_acc,0.79824
train_loss,0.05804
val_acc,0.7968
val_loss,0.05852


[34m[1mwandb[0m: Agent Starting Run: sioudpxi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8459, Train Loss=0.0430, Val Acc=0.8534, Val Loss=0.0424
Epoch 2: Train Acc=0.8594, Train Loss=0.0390, Val Acc=0.8614, Val Loss=0.0392
Epoch 3: Train Acc=0.8678, Train Loss=0.0366, Val Acc=0.8650, Val Loss=0.0375
Epoch 4: Train Acc=0.8751, Train Loss=0.0347, Val Acc=0.8688, Val Loss=0.0362
Epoch 5: Train Acc=0.8807, Train Loss=0.0332, Val Acc=0.8714, Val Loss=0.0352


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.88065
train_loss,0.03323
val_acc,0.8714
val_loss,0.03517


[34m[1mwandb[0m: Agent Starting Run: hbcdh6lc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4552, Train Loss=0.2227, Val Acc=0.4698, Val Loss=0.2225
Epoch 2: Train Acc=0.3615, Train Loss=0.2078, Val Acc=0.3732, Val Loss=0.2075
Epoch 3: Train Acc=0.3032, Train Loss=0.1941, Val Acc=0.3108, Val Loss=0.1938
Epoch 4: Train Acc=0.2842, Train Loss=0.1847, Val Acc=0.2976, Val Loss=0.1844
Epoch 5: Train Acc=0.2966, Train Loss=0.1780, Val Acc=0.3094, Val Loss=0.1778


0,1
epoch,▁▃▅▆█
train_acc,█▄▂▁▂
train_loss,█▆▄▂▁
val_acc,█▄▂▁▁
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_acc,0.29664
train_loss,0.17802
val_acc,0.3094
val_loss,0.17777


[34m[1mwandb[0m: Agent Starting Run: 48og2mj6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▄▂▂▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▃▅▆▆▇▇███

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: kp9cg229 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.3201, Train Loss=0.2219, Val Acc=0.3304, Val Loss=0.2217
Epoch 2: Train Acc=0.4580, Train Loss=0.1743, Val Acc=0.4732, Val Loss=0.1734
Epoch 3: Train Acc=0.6271, Train Loss=0.1108, Val Acc=0.6308, Val Loss=0.1099
Epoch 4: Train Acc=0.6823, Train Loss=0.0874, Val Acc=0.6832, Val Loss=0.0869
Epoch 5: Train Acc=0.7139, Train Loss=0.0788, Val Acc=0.7160, Val Loss=0.0785
Epoch 6: Train Acc=0.7343, Train Loss=0.0736, Val Acc=0.7334, Val Loss=0.0733
Epoch 7: Train Acc=0.7490, Train Loss=0.0700, Val Acc=0.7496, Val Loss=0.0696
Epoch 8: Train Acc=0.7598, Train Loss=0.0669, Val Acc=0.7618, Val Loss=0.0665
Epoch 9: Train Acc=0.7698, Train Loss=0.0645, Val Acc=0.7730, Val Loss=0.0639
Epoch 10: Train Acc=0.7794, Train Loss=0.0622, Val Acc=0.7832, Val Loss=0.0617


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▆▇▇▇████
train_loss,█▆▃▂▂▂▁▁▁▁
val_acc,▁▃▆▆▇▇▇███
val_loss,█▆▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.77936
train_loss,0.06223
val_acc,0.7832
val_loss,0.06166


[34m[1mwandb[0m: Agent Starting Run: uy9g1zyk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1001, Train Loss=0.2306, Val Acc=0.0986, Val Loss=0.2305
Epoch 2: Train Acc=0.1208, Train Loss=0.2300, Val Acc=0.1184, Val Loss=0.2300
Epoch 3: Train Acc=0.2153, Train Loss=0.2299, Val Acc=0.2144, Val Loss=0.2299
Epoch 4: Train Acc=0.2262, Train Loss=0.2298, Val Acc=0.2192, Val Loss=0.2298
Epoch 5: Train Acc=0.2593, Train Loss=0.2297, Val Acc=0.2556, Val Loss=0.2297
Epoch 6: Train Acc=0.2822, Train Loss=0.2296, Val Acc=0.2796, Val Loss=0.2296
Epoch 7: Train Acc=0.2919, Train Loss=0.2295, Val Acc=0.2854, Val Loss=0.2295
Epoch 8: Train Acc=0.2967, Train Loss=0.2294, Val Acc=0.2914, Val Loss=0.2294
Epoch 9: Train Acc=0.2991, Train Loss=0.2293, Val Acc=0.2944, Val Loss=0.2293
Epoch 10: Train Acc=0.2997, Train Loss=0.2291, Val Acc=0.2926, Val Loss=0.2291


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▅▅▇▇████
train_loss,█▅▅▄▄▃▃▂▂▁
val_acc,▁▂▅▅▇▇████
val_loss,█▅▅▅▄▄▃▂▂▁

0,1
epoch,10.0
train_acc,0.29975
train_loss,0.22913
val_acc,0.2926
val_loss,0.22913


[34m[1mwandb[0m: Agent Starting Run: azxd27gz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7357, Train Loss=0.0730, Val Acc=0.7402, Val Loss=0.0727
Epoch 2: Train Acc=0.7947, Train Loss=0.0562, Val Acc=0.8040, Val Loss=0.0552
Epoch 3: Train Acc=0.8162, Train Loss=0.0486, Val Acc=0.8204, Val Loss=0.0484
Epoch 4: Train Acc=0.8405, Train Loss=0.0444, Val Acc=0.8384, Val Loss=0.0448
Epoch 5: Train Acc=0.8591, Train Loss=0.0406, Val Acc=0.8560, Val Loss=0.0413


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.85905
train_loss,0.04055
val_acc,0.856
val_loss,0.04128


[34m[1mwandb[0m: Agent Starting Run: ohd1qlt9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8063, Train Loss=0.0540, Val Acc=0.8136, Val Loss=0.0537
Epoch 2: Train Acc=0.8254, Train Loss=0.0489, Val Acc=0.8304, Val Loss=0.0486
Epoch 3: Train Acc=0.8354, Train Loss=0.0462, Val Acc=0.8408, Val Loss=0.0459
Epoch 4: Train Acc=0.8423, Train Loss=0.0443, Val Acc=0.8456, Val Loss=0.0441
Epoch 5: Train Acc=0.8475, Train Loss=0.0428, Val Acc=0.8528, Val Loss=0.0427


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.84751
train_loss,0.04282
val_acc,0.8528
val_loss,0.04273


[34m[1mwandb[0m: Agent Starting Run: ohkhfjh4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1995, Train Loss=0.1702, Val Acc=0.1982, Val Loss=0.1701
Epoch 2: Train Acc=0.2003, Train Loss=0.1689, Val Acc=0.1988, Val Loss=0.1687
Epoch 3: Train Acc=0.2009, Train Loss=0.1681, Val Acc=0.2000, Val Loss=0.1678
Epoch 4: Train Acc=0.2070, Train Loss=0.1674, Val Acc=0.2190, Val Loss=0.1671
Epoch 5: Train Acc=0.2029, Train Loss=0.1668, Val Acc=0.2168, Val Loss=0.1666
Epoch 6: Train Acc=0.2037, Train Loss=0.1662, Val Acc=0.2164, Val Loss=0.1660
Epoch 7: Train Acc=0.2053, Train Loss=0.1653, Val Acc=0.2190, Val Loss=0.1651
Epoch 8: Train Acc=0.2144, Train Loss=0.1638, Val Acc=0.2268, Val Loss=0.1636
Epoch 9: Train Acc=0.2778, Train Loss=0.1614, Val Acc=0.2910, Val Loss=0.1612
Epoch 10: Train Acc=0.2959, Train Loss=0.1578, Val Acc=0.3072, Val Loss=0.1576


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▂▁▁▁▂▇█
train_loss,█▇▇▆▆▆▅▄▃▁
val_acc,▁▁▁▂▂▂▂▃▇█
val_loss,█▇▇▆▆▆▅▄▃▁

0,1
epoch,10.0
train_acc,0.29595
train_loss,0.15775
val_acc,0.3072
val_loss,0.15764


[34m[1mwandb[0m: Agent Starting Run: ob5h66ms with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5172, Train Loss=0.1566, Val Acc=0.5086, Val Loss=0.1567
Epoch 2: Train Acc=0.6576, Train Loss=0.1091, Val Acc=0.6632, Val Loss=0.1093
Epoch 3: Train Acc=0.7321, Train Loss=0.0825, Val Acc=0.7406, Val Loss=0.0826
Epoch 4: Train Acc=0.7615, Train Loss=0.0683, Val Acc=0.7674, Val Loss=0.0681
Epoch 5: Train Acc=0.7802, Train Loss=0.0608, Val Acc=0.7858, Val Loss=0.0607


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇██
train_loss,█▅▃▂▁
val_acc,▁▅▇██
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.7802
train_loss,0.06085
val_acc,0.7858
val_loss,0.06067


[34m[1mwandb[0m: Agent Starting Run: qmssvvjm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4073, Train Loss=0.2034, Val Acc=0.4192, Val Loss=0.2025
Epoch 2: Train Acc=0.4948, Train Loss=0.1807, Val Acc=0.5172, Val Loss=0.1795
Epoch 3: Train Acc=0.5653, Train Loss=0.1628, Val Acc=0.5814, Val Loss=0.1615
Epoch 4: Train Acc=0.6004, Train Loss=0.1490, Val Acc=0.6162, Val Loss=0.1475
Epoch 5: Train Acc=0.6197, Train Loss=0.1379, Val Acc=0.6338, Val Loss=0.1365


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▆▄▂▁
val_acc,▁▄▆▇█
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_acc,0.61967
train_loss,0.13791
val_acc,0.6338
val_loss,0.13652


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5008owcg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,█▁▁▁▁▁▁▁▁▁
train_loss,█▂▂▂▂▂▁▁▁▁
val_acc,▁█████████
val_loss,█▂▂▂▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: v3v82242 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7149, Train Loss=0.0957, Val Acc=0.7216, Val Loss=0.0953
Epoch 2: Train Acc=0.7657, Train Loss=0.0806, Val Acc=0.7698, Val Loss=0.0801
Epoch 3: Train Acc=0.7879, Train Loss=0.0722, Val Acc=0.7932, Val Loss=0.0716
Epoch 4: Train Acc=0.8012, Train Loss=0.0666, Val Acc=0.8032, Val Loss=0.0660
Epoch 5: Train Acc=0.8086, Train Loss=0.0626, Val Acc=0.8114, Val Loss=0.0619
Epoch 6: Train Acc=0.8140, Train Loss=0.0596, Val Acc=0.8180, Val Loss=0.0588
Epoch 7: Train Acc=0.8181, Train Loss=0.0572, Val Acc=0.8218, Val Loss=0.0563
Epoch 8: Train Acc=0.8221, Train Loss=0.0552, Val Acc=0.8284, Val Loss=0.0542
Epoch 9: Train Acc=0.8253, Train Loss=0.0535, Val Acc=0.8316, Val Loss=0.0526
Epoch 10: Train Acc=0.8283, Train Loss=0.0521, Val Acc=0.8342, Val Loss=0.0511


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▆▇▇▇███
train_loss,█▆▄▃▃▂▂▁▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▆▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.82829
train_loss,0.05212
val_acc,0.8342
val_loss,0.05113


[34m[1mwandb[0m: Agent Starting Run: b334tm4h with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7320, Train Loss=0.0817, Val Acc=0.7418, Val Loss=0.0809
Epoch 2: Train Acc=0.7818, Train Loss=0.0632, Val Acc=0.7890, Val Loss=0.0625
Epoch 3: Train Acc=0.8082, Train Loss=0.0552, Val Acc=0.8152, Val Loss=0.0545
Epoch 4: Train Acc=0.8214, Train Loss=0.0507, Val Acc=0.8286, Val Loss=0.0500
Epoch 5: Train Acc=0.8307, Train Loss=0.0479, Val Acc=0.8342, Val Loss=0.0473


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇██
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.83065
train_loss,0.04793
val_acc,0.8342
val_loss,0.04734


[34m[1mwandb[0m: Agent Starting Run: w0pr2w4e with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6637, Train Loss=0.1070, Val Acc=0.6740, Val Loss=0.1062
Epoch 2: Train Acc=0.7255, Train Loss=0.0808, Val Acc=0.7288, Val Loss=0.0805
Epoch 3: Train Acc=0.7713, Train Loss=0.0695, Val Acc=0.7732, Val Loss=0.0692
Epoch 4: Train Acc=0.7962, Train Loss=0.0623, Val Acc=0.7984, Val Loss=0.0619
Epoch 5: Train Acc=0.8101, Train Loss=0.0572, Val Acc=0.8130, Val Loss=0.0569


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.81005
train_loss,0.05721
val_acc,0.813
val_loss,0.05689


[34m[1mwandb[0m: Agent Starting Run: 8grxycdt with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5855, Train Loss=0.0990, Val Acc=0.5914, Val Loss=0.0987
Epoch 2: Train Acc=0.6250, Train Loss=0.0934, Val Acc=0.6302, Val Loss=0.0932
Epoch 3: Train Acc=0.6686, Train Loss=0.0873, Val Acc=0.6720, Val Loss=0.0872
Epoch 4: Train Acc=0.6883, Train Loss=0.0818, Val Acc=0.6940, Val Loss=0.0818
Epoch 5: Train Acc=0.6965, Train Loss=0.0793, Val Acc=0.7026, Val Loss=0.0793
Epoch 6: Train Acc=0.7033, Train Loss=0.0777, Val Acc=0.7084, Val Loss=0.0777
Epoch 7: Train Acc=0.7088, Train Loss=0.0763, Val Acc=0.7170, Val Loss=0.0763
Epoch 8: Train Acc=0.7135, Train Loss=0.0749, Val Acc=0.7240, Val Loss=0.0749
Epoch 9: Train Acc=0.7247, Train Loss=0.0735, Val Acc=0.7316, Val Loss=0.0733
Epoch 10: Train Acc=0.7366, Train Loss=0.0719, Val Acc=0.7432, Val Loss=0.0717


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▆▆▆▇▇▇█
train_loss,█▇▅▄▃▂▂▂▁▁
val_acc,▁▃▅▆▆▆▇▇▇█
val_loss,█▇▅▄▃▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.73658
train_loss,0.07189
val_acc,0.7432
val_loss,0.07166


[34m[1mwandb[0m: Agent Starting Run: i563y712 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8609, Train Loss=0.0385, Val Acc=0.8602, Val Loss=0.0384
Epoch 2: Train Acc=0.8778, Train Loss=0.0333, Val Acc=0.8756, Val Loss=0.0348
Epoch 3: Train Acc=0.8861, Train Loss=0.0310, Val Acc=0.8790, Val Loss=0.0336
Epoch 4: Train Acc=0.8921, Train Loss=0.0291, Val Acc=0.8808, Val Loss=0.0325
Epoch 5: Train Acc=0.8953, Train Loss=0.0280, Val Acc=0.8862, Val Loss=0.0323


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.89533
train_loss,0.02798
val_acc,0.8862
val_loss,0.0323


[34m[1mwandb[0m: Agent Starting Run: kjc458sh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2320, Train Loss=0.1653, Val Acc=0.2258, Val Loss=0.1652
Epoch 2: Train Acc=0.4895, Train Loss=0.1187, Val Acc=0.4900, Val Loss=0.1189
Epoch 3: Train Acc=0.5715, Train Loss=0.1011, Val Acc=0.5746, Val Loss=0.1011
Epoch 4: Train Acc=0.6452, Train Loss=0.0902, Val Acc=0.6534, Val Loss=0.0903
Epoch 5: Train Acc=0.6826, Train Loss=0.0836, Val Acc=0.6872, Val Loss=0.0837


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▂▂▁
val_acc,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.6826
train_loss,0.08364
val_acc,0.6872
val_loss,0.08373


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p9yb9ztx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.4493, Train Loss=0.1198, Val Acc=0.4526, Val Loss=0.1196
Epoch 2: Train Acc=0.6218, Train Loss=0.0909, Val Acc=0.6230, Val Loss=0.0901
Epoch 3: Train Acc=0.6514, Train Loss=0.0777, Val Acc=0.6544, Val Loss=0.0771
Epoch 4: Train Acc=0.7239, Train Loss=0.0699, Val Acc=0.7260, Val Loss=0.0696
Epoch 5: Train Acc=0.7329, Train Loss=0.0645, Val Acc=0.7338, Val Loss=0.0647


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆██
train_loss,█▄▃▂▁
val_acc,▁▅▆██
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.73293
train_loss,0.06454
val_acc,0.7338
val_loss,0.0647


[34m[1mwandb[0m: Agent Starting Run: s53o4us4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁▇███
val_acc,▁▁▁▁▁
val_loss,▁▇███

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: hqfi718d with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8562, Train Loss=0.0397, Val Acc=0.8516, Val Loss=0.0400
Epoch 2: Train Acc=0.8747, Train Loss=0.0346, Val Acc=0.8712, Val Loss=0.0359
Epoch 3: Train Acc=0.8833, Train Loss=0.0320, Val Acc=0.8756, Val Loss=0.0341
Epoch 4: Train Acc=0.8893, Train Loss=0.0303, Val Acc=0.8816, Val Loss=0.0331
Epoch 5: Train Acc=0.8933, Train Loss=0.0289, Val Acc=0.8810, Val Loss=0.0325


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▆▇██
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.89331
train_loss,0.02893
val_acc,0.881
val_loss,0.03248


[34m[1mwandb[0m: Agent Starting Run: rktvjyg4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0999, Train Loss=0.2286, Val Acc=0.1012, Val Loss=0.2286
Epoch 2: Train Acc=0.1985, Train Loss=0.2155, Val Acc=0.1894, Val Loss=0.2155
Epoch 3: Train Acc=0.1995, Train Loss=0.1953, Val Acc=0.1918, Val Loss=0.1953
Epoch 4: Train Acc=0.2005, Train Loss=0.1823, Val Acc=0.1924, Val Loss=0.1822
Epoch 5: Train Acc=0.2013, Train Loss=0.1757, Val Acc=0.1930, Val Loss=0.1756


0,1
epoch,▁▃▅▆█
train_acc,▁████
train_loss,█▆▄▂▁
val_acc,▁████
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_acc,0.20129
train_loss,0.17569
val_acc,0.193
val_loss,0.17562


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c00366ok with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7367, Train Loss=0.0728, Val Acc=0.7408, Val Loss=0.0727
Epoch 2: Train Acc=0.7845, Train Loss=0.0599, Val Acc=0.7964, Val Loss=0.0587
Epoch 3: Train Acc=0.8098, Train Loss=0.0537, Val Acc=0.8182, Val Loss=0.0531
Epoch 4: Train Acc=0.8236, Train Loss=0.0503, Val Acc=0.8278, Val Loss=0.0503
Epoch 5: Train Acc=0.8339, Train Loss=0.0479, Val Acc=0.8344, Val Loss=0.0482


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇██
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.83385
train_loss,0.04785
val_acc,0.8344
val_loss,0.04821


[34m[1mwandb[0m: Agent Starting Run: 4jzu3n8z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▂▆▇█▇█▇▇▇
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▅▇▇██████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 892ua5tf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁█████████
train_loss,█▁▁▁▁▁▁▁▁▁
val_acc,█▁▁▁▁▁▁▁▁▁
val_loss,▁▆▇███████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: nlntrfbi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8099, Train Loss=0.0544, Val Acc=0.8128, Val Loss=0.0533
Epoch 2: Train Acc=0.8381, Train Loss=0.0455, Val Acc=0.8448, Val Loss=0.0448
Epoch 3: Train Acc=0.8506, Train Loss=0.0417, Val Acc=0.8536, Val Loss=0.0415
Epoch 4: Train Acc=0.8588, Train Loss=0.0394, Val Acc=0.8592, Val Loss=0.0397
Epoch 5: Train Acc=0.8642, Train Loss=0.0378, Val Acc=0.8624, Val Loss=0.0385


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▆▇██
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.86424
train_loss,0.03784
val_acc,0.8624
val_loss,0.03847


[34m[1mwandb[0m: Agent Starting Run: 6nnhi8lg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 6: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 7: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 8: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 9: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 10: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,█▁▁▁▁▁▁▁▁▁
train_loss,█▇▇▆▆▅▄▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,██▇▇▆▆▅▄▃▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23021
val_acc,0.0914
val_loss,0.23022


[34m[1mwandb[0m: Agent Starting Run: h39egqh4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2005, Train Loss=0.1679, Val Acc=0.1994, Val Loss=0.1677
Epoch 2: Train Acc=0.2032, Train Loss=0.1669, Val Acc=0.2014, Val Loss=0.1666
Epoch 3: Train Acc=0.2136, Train Loss=0.1651, Val Acc=0.2246, Val Loss=0.1649
Epoch 4: Train Acc=0.2803, Train Loss=0.1598, Val Acc=0.2912, Val Loss=0.1597
Epoch 5: Train Acc=0.3732, Train Loss=0.1504, Val Acc=0.3858, Val Loss=0.1502


0,1
epoch,▁▃▅▆█
train_acc,▁▁▂▄█
train_loss,██▇▅▁
val_acc,▁▁▂▄█
val_loss,██▇▅▁

0,1
epoch,5.0
train_acc,0.37316
train_loss,0.15042
val_acc,0.3858
val_loss,0.15021


[34m[1mwandb[0m: Agent Starting Run: 0xorcfe7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8023, Train Loss=0.0602, Val Acc=0.8070, Val Loss=0.0595
Epoch 2: Train Acc=0.8237, Train Loss=0.0518, Val Acc=0.8276, Val Loss=0.0509
Epoch 3: Train Acc=0.8328, Train Loss=0.0481, Val Acc=0.8372, Val Loss=0.0472
Epoch 4: Train Acc=0.8388, Train Loss=0.0459, Val Acc=0.8444, Val Loss=0.0450
Epoch 5: Train Acc=0.8440, Train Loss=0.0444, Val Acc=0.8490, Val Loss=0.0436


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.84396
train_loss,0.04439
val_acc,0.849
val_loss,0.04355


[34m[1mwandb[0m: Agent Starting Run: 8zy6saix with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6843, Train Loss=0.0919, Val Acc=0.6902, Val Loss=0.0912
Epoch 2: Train Acc=0.7492, Train Loss=0.0715, Val Acc=0.7586, Val Loss=0.0708
Epoch 3: Train Acc=0.7824, Train Loss=0.0618, Val Acc=0.7904, Val Loss=0.0610
Epoch 4: Train Acc=0.8039, Train Loss=0.0558, Val Acc=0.8088, Val Loss=0.0550
Epoch 5: Train Acc=0.8178, Train Loss=0.0518, Val Acc=0.8238, Val Loss=0.0510
Epoch 6: Train Acc=0.8266, Train Loss=0.0491, Val Acc=0.8356, Val Loss=0.0483
Epoch 7: Train Acc=0.8336, Train Loss=0.0470, Val Acc=0.8420, Val Loss=0.0464
Epoch 8: Train Acc=0.8385, Train Loss=0.0455, Val Acc=0.8440, Val Loss=0.0450
Epoch 9: Train Acc=0.8428, Train Loss=0.0442, Val Acc=0.8480, Val Loss=0.0438
Epoch 10: Train Acc=0.8461, Train Loss=0.0430, Val Acc=0.8504, Val Loss=0.0429


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▅▆▇▇████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.84609
train_loss,0.04303
val_acc,0.8504
val_loss,0.04289


[34m[1mwandb[0m: Agent Starting Run: 11fbubyd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2001, Train Loss=0.1854, Val Acc=0.1932, Val Loss=0.1853
Epoch 2: Train Acc=0.2032, Train Loss=0.1712, Val Acc=0.1966, Val Loss=0.1710
Epoch 3: Train Acc=0.2073, Train Loss=0.1685, Val Acc=0.2066, Val Loss=0.1682
Epoch 4: Train Acc=0.2172, Train Loss=0.1671, Val Acc=0.2162, Val Loss=0.1668
Epoch 5: Train Acc=0.2938, Train Loss=0.1618, Val Acc=0.2868, Val Loss=0.1617


0,1
epoch,▁▃▅▆█
train_acc,▁▁▂▂█
train_loss,█▄▃▃▁
val_acc,▁▁▂▃█
val_loss,█▄▃▃▁

0,1
epoch,5.0
train_acc,0.2938
train_loss,0.1618
val_acc,0.2868
val_loss,0.16175


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p6lqsrlg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7108, Train Loss=0.0797, Val Acc=0.7184, Val Loss=0.0788
Epoch 2: Train Acc=0.7808, Train Loss=0.0622, Val Acc=0.7856, Val Loss=0.0620
Epoch 3: Train Acc=0.8123, Train Loss=0.0545, Val Acc=0.8170, Val Loss=0.0543
Epoch 4: Train Acc=0.8248, Train Loss=0.0506, Val Acc=0.8300, Val Loss=0.0504
Epoch 5: Train Acc=0.8326, Train Loss=0.0481, Val Acc=0.8366, Val Loss=0.0480
Epoch 6: Train Acc=0.8384, Train Loss=0.0463, Val Acc=0.8422, Val Loss=0.0463
Epoch 7: Train Acc=0.8433, Train Loss=0.0449, Val Acc=0.8488, Val Loss=0.0449
Epoch 8: Train Acc=0.8473, Train Loss=0.0437, Val Acc=0.8496, Val Loss=0.0438
Epoch 9: Train Acc=0.8503, Train Loss=0.0427, Val Acc=0.8542, Val Loss=0.0429
Epoch 10: Train Acc=0.8529, Train Loss=0.0418, Val Acc=0.8550, Val Loss=0.0420


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▇▇▇████
train_loss,█▅▃▃▂▂▂▁▁▁
val_acc,▁▄▆▇▇▇████
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.85291
train_loss,0.04181
val_acc,0.855
val_loss,0.04205


[34m[1mwandb[0m: Agent Starting Run: sfr3qasx with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4861, Train Loss=0.1491, Val Acc=0.4980, Val Loss=0.1482
Epoch 2: Train Acc=0.7240, Train Loss=0.0789, Val Acc=0.7338, Val Loss=0.0781
Epoch 3: Train Acc=0.7766, Train Loss=0.0653, Val Acc=0.7832, Val Loss=0.0649
Epoch 4: Train Acc=0.7989, Train Loss=0.0592, Val Acc=0.8046, Val Loss=0.0589
Epoch 5: Train Acc=0.8101, Train Loss=0.0553, Val Acc=0.8164, Val Loss=0.0551


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇██
train_loss,█▃▂▁▁
val_acc,▁▆▇██
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,0.81005
train_loss,0.05528
val_acc,0.8164
val_loss,0.05513


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gxd8t8u1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▂▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: m491rfol with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▄▃▃▂▂▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▄▅▅▆▇▇██

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: ljn97y47 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▆▅▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23031
val_acc,0.0976
val_loss,0.23036


[34m[1mwandb[0m: Agent Starting Run: o7qx9s7t with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▅███▇▆▅▄▃▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▇████▇▇▇▆

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x118850a with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8584, Train Loss=0.0395, Val Acc=0.8582, Val Loss=0.0396
Epoch 2: Train Acc=0.8739, Train Loss=0.0348, Val Acc=0.8738, Val Loss=0.0358
Epoch 3: Train Acc=0.8821, Train Loss=0.0323, Val Acc=0.8792, Val Loss=0.0342
Epoch 4: Train Acc=0.8875, Train Loss=0.0306, Val Acc=0.8788, Val Loss=0.0333
Epoch 5: Train Acc=0.8923, Train Loss=0.0293, Val Acc=0.8816, Val Loss=0.0326
Epoch 6: Train Acc=0.8957, Train Loss=0.0282, Val Acc=0.8828, Val Loss=0.0322
Epoch 7: Train Acc=0.8987, Train Loss=0.0273, Val Acc=0.8826, Val Loss=0.0319
Epoch 8: Train Acc=0.9012, Train Loss=0.0266, Val Acc=0.8838, Val Loss=0.0318
Epoch 9: Train Acc=0.9035, Train Loss=0.0260, Val Acc=0.8842, Val Loss=0.0318
Epoch 10: Train Acc=0.9060, Train Loss=0.0254, Val Acc=0.8860, Val Loss=0.0318


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▅▆▆▇▇▇▇██
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.90602
train_loss,0.02535
val_acc,0.886
val_loss,0.03182


[34m[1mwandb[0m: Agent Starting Run: ffgkq35w with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8401, Train Loss=0.0465, Val Acc=0.8490, Val Loss=0.0452
Epoch 2: Train Acc=0.8574, Train Loss=0.0408, Val Acc=0.8574, Val Loss=0.0408
Epoch 3: Train Acc=0.8668, Train Loss=0.0374, Val Acc=0.8600, Val Loss=0.0384
Epoch 4: Train Acc=0.8731, Train Loss=0.0350, Val Acc=0.8644, Val Loss=0.0367
Epoch 5: Train Acc=0.8763, Train Loss=0.0336, Val Acc=0.8666, Val Loss=0.0358


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▅▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.87631
train_loss,0.03357
val_acc,0.8666
val_loss,0.03581


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0i2zcjmt with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6384, Train Loss=0.0903, Val Acc=0.6440, Val Loss=0.0898
Epoch 2: Train Acc=0.7182, Train Loss=0.0721, Val Acc=0.7232, Val Loss=0.0717
Epoch 3: Train Acc=0.7333, Train Loss=0.0648, Val Acc=0.7392, Val Loss=0.0641
Epoch 4: Train Acc=0.7499, Train Loss=0.0597, Val Acc=0.7536, Val Loss=0.0595
Epoch 5: Train Acc=0.8074, Train Loss=0.0521, Val Acc=0.8074, Val Loss=0.0525
Epoch 6: Train Acc=0.8467, Train Loss=0.0451, Val Acc=0.8442, Val Loss=0.0469
Epoch 7: Train Acc=0.8606, Train Loss=0.0415, Val Acc=0.8576, Val Loss=0.0441
Epoch 8: Train Acc=0.8690, Train Loss=0.0390, Val Acc=0.8660, Val Loss=0.0421
Epoch 9: Train Acc=0.8742, Train Loss=0.0374, Val Acc=0.8676, Val Loss=0.0412
Epoch 10: Train Acc=0.8785, Train Loss=0.0363, Val Acc=0.8706, Val Loss=0.0406


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▄▆▇▇███
train_loss,█▆▅▄▃▂▂▁▁▁
val_acc,▁▃▄▄▆▇████
val_loss,█▅▄▄▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.87845
train_loss,0.03626
val_acc,0.8706
val_loss,0.04059


[34m[1mwandb[0m: Agent Starting Run: g466s461 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▄▃▃▂▂▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▄▅▅▆▇▇██

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: hci5dc6t with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1796, Train Loss=0.2296, Val Acc=0.1772, Val Loss=0.2296
Epoch 2: Train Acc=0.1981, Train Loss=0.2287, Val Acc=0.1974, Val Loss=0.2288
Epoch 3: Train Acc=0.2059, Train Loss=0.2272, Val Acc=0.2060, Val Loss=0.2272
Epoch 4: Train Acc=0.2650, Train Loss=0.2240, Val Acc=0.2616, Val Loss=0.2239
Epoch 5: Train Acc=0.2751, Train Loss=0.2152, Val Acc=0.2690, Val Loss=0.2150
Epoch 6: Train Acc=0.3474, Train Loss=0.1941, Val Acc=0.3420, Val Loss=0.1937
Epoch 7: Train Acc=0.3741, Train Loss=0.1743, Val Acc=0.3698, Val Loss=0.1739
Epoch 8: Train Acc=0.4583, Train Loss=0.1633, Val Acc=0.4626, Val Loss=0.1629
Epoch 9: Train Acc=0.4682, Train Loss=0.1535, Val Acc=0.4728, Val Loss=0.1531
Epoch 10: Train Acc=0.4963, Train Loss=0.1434, Val Acc=0.5050, Val Loss=0.1429


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▂▃▃▅▅▇▇█
train_loss,████▇▅▄▃▂▁
val_acc,▁▁▂▃▃▅▅▇▇█
val_loss,████▇▅▄▃▂▁

0,1
epoch,10.0
train_acc,0.49625
train_loss,0.14342
val_acc,0.505
val_loss,0.14292


[34m[1mwandb[0m: Agent Starting Run: qpmp1d5y with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▂▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hulytw7m with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▆▅▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23032
val_acc,0.0976
val_loss,0.2304


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kjdy17n5 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1000, Train Loss=0.2332, Val Acc=0.1002, Val Loss=0.2335
Epoch 2: Train Acc=0.1000, Train Loss=0.2327, Val Acc=0.1002, Val Loss=0.2330
Epoch 3: Train Acc=0.1000, Train Loss=0.2323, Val Acc=0.1002, Val Loss=0.2326
Epoch 4: Train Acc=0.1000, Train Loss=0.2320, Val Acc=0.1002, Val Loss=0.2322
Epoch 5: Train Acc=0.1000, Train Loss=0.2317, Val Acc=0.1002, Val Loss=0.2319
Epoch 6: Train Acc=0.1000, Train Loss=0.2315, Val Acc=0.1002, Val Loss=0.2317
Epoch 7: Train Acc=0.1000, Train Loss=0.2313, Val Acc=0.1002, Val Loss=0.2315
Epoch 8: Train Acc=0.1000, Train Loss=0.2311, Val Acc=0.1002, Val Loss=0.2313
Epoch 9: Train Acc=0.1000, Train Loss=0.2310, Val Acc=0.1002, Val Loss=0.2312
Epoch 10: Train Acc=0.1000, Train Loss=0.2309, Val Acc=0.1002, Val Loss=0.2310


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▅▄▃▃▂▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▅▄▄▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.09998
train_loss,0.23086
val_acc,0.1002
val_loss,0.23104


[34m[1mwandb[0m: Agent Starting Run: eb4zz1g7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.2014, Train Loss=0.1696, Val Acc=0.1998, Val Loss=0.1695
Epoch 5: Train Acc=0.2051, Train Loss=0.1675, Val Acc=0.2030, Val Loss=0.1674
Epoch 6: Train Acc=0.2176, Train Loss=0.1666, Val Acc=0.2152, Val Loss=0.1664
Epoch 7: Train Acc=0.3816, Train Loss=0.1490, Val Acc=0.3872, Val Loss=0.1495
Epoch 8: Train Acc=0.4113, Train Loss=0.1306, Val Acc=0.4120, Val Loss=0.1307
Epoch 9: Train Acc=0.4756, Train Loss=0.1196, Val Acc=0.4806, Val Loss=0.1196
Epoch 10: Train Acc=0.5039, Train Loss=0.1121, Val Acc=0.5072, Val Loss=0.1121


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▃▃▃▆▆██
train_loss,███▄▄▄▃▂▁▁
val_acc,▁▁▁▃▃▃▆▆██
val_loss,███▄▄▄▃▂▁▁

0,1
epoch,10.0
train_acc,0.50385
train_loss,0.11212
val_acc,0.5072
val_loss,0.11213


[34m[1mwandb[0m: Agent Starting Run: wsj5s2fc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6482, Train Loss=0.1198, Val Acc=0.6566, Val Loss=0.1185
Epoch 2: Train Acc=0.7070, Train Loss=0.0906, Val Acc=0.7110, Val Loss=0.0895
Epoch 3: Train Acc=0.7395, Train Loss=0.0778, Val Acc=0.7454, Val Loss=0.0768
Epoch 4: Train Acc=0.7631, Train Loss=0.0700, Val Acc=0.7648, Val Loss=0.0690
Epoch 5: Train Acc=0.7788, Train Loss=0.0645, Val Acc=0.7868, Val Loss=0.0634


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.7788
train_loss,0.06451
val_acc,0.7868
val_loss,0.06343


[34m[1mwandb[0m: Agent Starting Run: 2toub3bx with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8169, Train Loss=0.0535, Val Acc=0.8230, Val Loss=0.0530
Epoch 2: Train Acc=0.8320, Train Loss=0.0481, Val Acc=0.8384, Val Loss=0.0476
Epoch 3: Train Acc=0.8402, Train Loss=0.0455, Val Acc=0.8480, Val Loss=0.0450
Epoch 4: Train Acc=0.8465, Train Loss=0.0439, Val Acc=0.8526, Val Loss=0.0435
Epoch 5: Train Acc=0.8493, Train Loss=0.0427, Val Acc=0.8552, Val Loss=0.0424


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.84935
train_loss,0.0427
val_acc,0.8552
val_loss,0.04236


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xo306pb0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7951, Train Loss=0.0575, Val Acc=0.7972, Val Loss=0.0569
Epoch 2: Train Acc=0.8331, Train Loss=0.0472, Val Acc=0.8368, Val Loss=0.0470
Epoch 3: Train Acc=0.8425, Train Loss=0.0439, Val Acc=0.8456, Val Loss=0.0444
Epoch 4: Train Acc=0.8509, Train Loss=0.0411, Val Acc=0.8518, Val Loss=0.0422
Epoch 5: Train Acc=0.8611, Train Loss=0.0384, Val Acc=0.8566, Val Loss=0.0400


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▆▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.86111
train_loss,0.03838
val_acc,0.8566
val_loss,0.03996


[34m[1mwandb[0m: Agent Starting Run: ujhg8o0m with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁████
val_acc,▁▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: m2zctc0h with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.0999, Train Loss=0.2333, Val Acc=0.1008, Val Loss=0.2336
Epoch 2: Train Acc=0.0999, Train Loss=0.2317, Val Acc=0.1008, Val Loss=0.2319
Epoch 3: Train Acc=0.0999, Train Loss=0.2310, Val Acc=0.1008, Val Loss=0.2311
Epoch 4: Train Acc=0.0999, Train Loss=0.2306, Val Acc=0.1008, Val Loss=0.2307
Epoch 5: Train Acc=0.0999, Train Loss=0.2304, Val Acc=0.1008, Val Loss=0.2305
Epoch 6: Train Acc=0.0999, Train Loss=0.2304, Val Acc=0.1008, Val Loss=0.2304
Epoch 7: Train Acc=0.0954, Train Loss=0.2303, Val Acc=0.0966, Val Loss=0.2304
Epoch 8: Train Acc=0.1013, Train Loss=0.2303, Val Acc=0.0920, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▆▆▆▆▆▆▁█▇▇
train_loss,█▄▃▂▁▁▁▁▁▁
val_acc,██████▅▁▁▁
val_loss,█▄▃▂▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23028
val_acc,0.0914
val_loss,0.23031


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wqesjt4a with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2524, Train Loss=0.1793, Val Acc=0.2594, Val Loss=0.1788
Epoch 2: Train Acc=0.4277, Train Loss=0.1379, Val Acc=0.4264, Val Loss=0.1376
Epoch 3: Train Acc=0.4798, Train Loss=0.1200, Val Acc=0.4790, Val Loss=0.1199
Epoch 4: Train Acc=0.5461, Train Loss=0.1074, Val Acc=0.5490, Val Loss=0.1071
Epoch 5: Train Acc=0.6165, Train Loss=0.0999, Val Acc=0.6214, Val Loss=0.0997


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▇█
train_loss,█▄▃▂▁
val_acc,▁▄▅▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.61647
train_loss,0.09993
val_acc,0.6214
val_loss,0.09973


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6cpoqkbt with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8365, Train Loss=0.0475, Val Acc=0.8372, Val Loss=0.0468
Epoch 2: Train Acc=0.8570, Train Loss=0.0398, Val Acc=0.8556, Val Loss=0.0397
Epoch 3: Train Acc=0.8677, Train Loss=0.0366, Val Acc=0.8660, Val Loss=0.0376
Epoch 4: Train Acc=0.8739, Train Loss=0.0344, Val Acc=0.8692, Val Loss=0.0363
Epoch 5: Train Acc=0.8780, Train Loss=0.0332, Val Acc=0.8686, Val Loss=0.0358


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇██
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,0.87798
train_loss,0.03321
val_acc,0.8686
val_loss,0.0358


[34m[1mwandb[0m: Agent Starting Run: wwmhm6ad with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7631, Train Loss=0.0649, Val Acc=0.7672, Val Loss=0.0647
Epoch 2: Train Acc=0.7962, Train Loss=0.0577, Val Acc=0.8010, Val Loss=0.0579
Epoch 3: Train Acc=0.8088, Train Loss=0.0542, Val Acc=0.8134, Val Loss=0.0546
Epoch 4: Train Acc=0.8181, Train Loss=0.0517, Val Acc=0.8232, Val Loss=0.0523
Epoch 5: Train Acc=0.8264, Train Loss=0.0497, Val Acc=0.8276, Val Loss=0.0505
Epoch 6: Train Acc=0.8323, Train Loss=0.0480, Val Acc=0.8336, Val Loss=0.0489
Epoch 7: Train Acc=0.8371, Train Loss=0.0465, Val Acc=0.8362, Val Loss=0.0476
Epoch 8: Train Acc=0.8414, Train Loss=0.0451, Val Acc=0.8396, Val Loss=0.0464
Epoch 9: Train Acc=0.8446, Train Loss=0.0439, Val Acc=0.8424, Val Loss=0.0455
Epoch 10: Train Acc=0.8484, Train Loss=0.0428, Val Acc=0.8452, Val Loss=0.0444


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▄▅▆▆▇▇▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.84842
train_loss,0.04277
val_acc,0.8452
val_loss,0.04444


[34m[1mwandb[0m: Agent Starting Run: vysu0iis with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2012, Train Loss=0.1758, Val Acc=0.1964, Val Loss=0.1759
Epoch 2: Train Acc=0.2927, Train Loss=0.1507, Val Acc=0.2876, Val Loss=0.1519
Epoch 3: Train Acc=0.3675, Train Loss=0.1448, Val Acc=0.3632, Val Loss=0.1459
Epoch 4: Train Acc=0.5015, Train Loss=0.1182, Val Acc=0.5090, Val Loss=0.1186
Epoch 5: Train Acc=0.6429, Train Loss=0.0996, Val Acc=0.6490, Val Loss=0.0994
Epoch 6: Train Acc=0.6599, Train Loss=0.0902, Val Acc=0.6634, Val Loss=0.0903
Epoch 7: Train Acc=0.7168, Train Loss=0.0840, Val Acc=0.7246, Val Loss=0.0844
Epoch 8: Train Acc=0.7651, Train Loss=0.0784, Val Acc=0.7634, Val Loss=0.0791
Epoch 9: Train Acc=0.7800, Train Loss=0.0746, Val Acc=0.7794, Val Loss=0.0757
Epoch 10: Train Acc=0.7824, Train Loss=0.0719, Val Acc=0.7794, Val Loss=0.0736


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▃▅▆▇▇███
train_loss,█▆▆▄▃▂▂▁▁▁
val_acc,▁▂▃▅▆▇▇███
val_loss,█▆▆▄▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.78244
train_loss,0.07194
val_acc,0.7794
val_loss,0.07362


[34m[1mwandb[0m: Agent Starting Run: b0hnvq2b with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2298, Val Acc=0.0914, Val Loss=0.2299
Epoch 2: Train Acc=0.1995, Train Loss=0.2116, Val Acc=0.2006, Val Loss=0.2116
Epoch 3: Train Acc=0.1998, Train Loss=0.1867, Val Acc=0.2012, Val Loss=0.1866
Epoch 4: Train Acc=0.2000, Train Loss=0.1761, Val Acc=0.2008, Val Loss=0.1760
Epoch 5: Train Acc=0.2013, Train Loss=0.1719, Val Acc=0.1994, Val Loss=0.1717


0,1
epoch,▁▃▅▆█
train_acc,▁████
train_loss,█▆▃▂▁
val_acc,▁████
val_loss,█▆▃▂▁

0,1
epoch,5.0
train_acc,0.20129
train_loss,0.17186
val_acc,0.1994
val_loss,0.17173


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gfpnqh7e with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5845, Train Loss=0.0979, Val Acc=0.5974, Val Loss=0.0963
Epoch 2: Train Acc=0.7736, Train Loss=0.0564, Val Acc=0.7846, Val Loss=0.0559
Epoch 3: Train Acc=0.7808, Train Loss=0.0618, Val Acc=0.7826, Val Loss=0.0624
Epoch 4: Train Acc=0.8285, Train Loss=0.0468, Val Acc=0.8258, Val Loss=0.0484
Epoch 5: Train Acc=0.8617, Train Loss=0.0384, Val Acc=0.8608, Val Loss=0.0414


0,1
epoch,▁▃▅▆█
train_acc,▁▆▆▇█
train_loss,█▃▄▂▁
val_acc,▁▆▆▇█
val_loss,█▃▄▂▁

0,1
epoch,5.0
train_acc,0.86167
train_loss,0.03842
val_acc,0.8608
val_loss,0.04142


[34m[1mwandb[0m: Agent Starting Run: hao4p39o with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8178, Train Loss=0.0520, Val Acc=0.8200, Val Loss=0.0511
Epoch 2: Train Acc=0.8454, Train Loss=0.0435, Val Acc=0.8492, Val Loss=0.0433
Epoch 3: Train Acc=0.8575, Train Loss=0.0400, Val Acc=0.8580, Val Loss=0.0405
Epoch 4: Train Acc=0.8650, Train Loss=0.0378, Val Acc=0.8622, Val Loss=0.0390
Epoch 5: Train Acc=0.8705, Train Loss=0.0363, Val Acc=0.8636, Val Loss=0.0380
Epoch 6: Train Acc=0.8743, Train Loss=0.0351, Val Acc=0.8676, Val Loss=0.0373
Epoch 7: Train Acc=0.8777, Train Loss=0.0342, Val Acc=0.8688, Val Loss=0.0367
Epoch 8: Train Acc=0.8807, Train Loss=0.0334, Val Acc=0.8710, Val Loss=0.0362
Epoch 9: Train Acc=0.8829, Train Loss=0.0326, Val Acc=0.8738, Val Loss=0.0358
Epoch 10: Train Acc=0.8854, Train Loss=0.0320, Val Acc=0.8742, Val Loss=0.0354


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_acc,▁▅▆▆▇▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.88538
train_loss,0.03196
val_acc,0.8742
val_loss,0.03542


[34m[1mwandb[0m: Agent Starting Run: r53sgfvr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0983, Train Loss=0.2303, Val Acc=0.0884, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 10: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁█████████
train_loss,█▇▇▆▅▄▄▃▂▁
val_acc,▁█████████
val_loss,██▇▇▆▅▄▃▂▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23023
val_acc,0.0914
val_loss,0.23025


[34m[1mwandb[0m: Agent Starting Run: xi7n7ha9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6223, Train Loss=0.0909, Val Acc=0.6278, Val Loss=0.0908
Epoch 2: Train Acc=0.7713, Train Loss=0.0675, Val Acc=0.7794, Val Loss=0.0676
Epoch 3: Train Acc=0.8018, Train Loss=0.0558, Val Acc=0.8022, Val Loss=0.0567
Epoch 4: Train Acc=0.8404, Train Loss=0.0487, Val Acc=0.8342, Val Loss=0.0509
Epoch 5: Train Acc=0.8502, Train Loss=0.0453, Val Acc=0.8444, Val Loss=0.0485


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇██
train_loss,█▄▃▂▁
val_acc,▁▆▇██
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.85022
train_loss,0.04528
val_acc,0.8444
val_loss,0.04851


[34m[1mwandb[0m: Agent Starting Run: 9g93vhvo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▅▄▄▃▂▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▄▅▆▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: btqmfm00 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▅▃▂▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▇█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: 1580bjet with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.4977, Train Loss=0.1183, Val Acc=0.5064, Val Loss=0.1174
Epoch 2: Train Acc=0.5874, Train Loss=0.1008, Val Acc=0.6012, Val Loss=0.1003
Epoch 3: Train Acc=0.6195, Train Loss=0.0960, Val Acc=0.6278, Val Loss=0.0958
Epoch 4: Train Acc=0.6415, Train Loss=0.0928, Val Acc=0.6458, Val Loss=0.0927
Epoch 5: Train Acc=0.6591, Train Loss=0.0903, Val Acc=0.6648, Val Loss=0.0902
Epoch 6: Train Acc=0.6739, Train Loss=0.0881, Val Acc=0.6804, Val Loss=0.0881
Epoch 7: Train Acc=0.6880, Train Loss=0.0858, Val Acc=0.6938, Val Loss=0.0859
Epoch 8: Train Acc=0.7021, Train Loss=0.0834, Val Acc=0.7104, Val Loss=0.0835
Epoch 9: Train Acc=0.7102, Train Loss=0.0815, Val Acc=0.7178, Val Loss=0.0817
Epoch 10: Train Acc=0.7171, Train Loss=0.0798, Val Acc=0.7254, Val Loss=0.0800


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▃▂▂▁▁
val_acc,▁▄▅▅▆▇▇███
val_loss,█▅▄▃▃▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.71711
train_loss,0.07981
val_acc,0.7254
val_loss,0.08003


[34m[1mwandb[0m: Agent Starting Run: lsvv23km with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6190, Train Loss=0.1042, Val Acc=0.6302, Val Loss=0.1030
Epoch 2: Train Acc=0.7143, Train Loss=0.0744, Val Acc=0.7260, Val Loss=0.0736
Epoch 3: Train Acc=0.7737, Train Loss=0.0628, Val Acc=0.7808, Val Loss=0.0622
Epoch 4: Train Acc=0.8010, Train Loss=0.0563, Val Acc=0.8062, Val Loss=0.0557
Epoch 5: Train Acc=0.8172, Train Loss=0.0523, Val Acc=0.8196, Val Loss=0.0517


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▂▂▁
val_acc,▁▅▇██
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.81718
train_loss,0.05233
val_acc,0.8196
val_loss,0.05171


[34m[1mwandb[0m: Agent Starting Run: ml545nm3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2991, Train Loss=0.1520, Val Acc=0.2968, Val Loss=0.1524
Epoch 2: Train Acc=0.3872, Train Loss=0.1388, Val Acc=0.3972, Val Loss=0.1392
Epoch 3: Train Acc=0.4469, Train Loss=0.1216, Val Acc=0.4526, Val Loss=0.1217
Epoch 4: Train Acc=0.5208, Train Loss=0.1136, Val Acc=0.5228, Val Loss=0.1136
Epoch 5: Train Acc=0.5639, Train Loss=0.1033, Val Acc=0.5668, Val Loss=0.1033
Epoch 6: Train Acc=0.6129, Train Loss=0.0948, Val Acc=0.6164, Val Loss=0.0948
Epoch 7: Train Acc=0.6664, Train Loss=0.0887, Val Acc=0.6690, Val Loss=0.0885
Epoch 8: Train Acc=0.6918, Train Loss=0.0841, Val Acc=0.6946, Val Loss=0.0837
Epoch 9: Train Acc=0.7044, Train Loss=0.0806, Val Acc=0.7090, Val Loss=0.0801
Epoch 10: Train Acc=0.7127, Train Loss=0.0776, Val Acc=0.7216, Val Loss=0.0771


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▄▅▅▆▇███
train_loss,█▇▅▄▃▃▂▂▁▁
val_acc,▁▃▄▅▅▆▇███
val_loss,█▇▅▄▃▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.71271
train_loss,0.07759
val_acc,0.7216
val_loss,0.07706


[34m[1mwandb[0m: Agent Starting Run: sf8j5ohu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2048, Train Loss=0.1706, Val Acc=0.2016, Val Loss=0.1706
Epoch 2: Train Acc=0.2235, Train Loss=0.1668, Val Acc=0.2204, Val Loss=0.1668
Epoch 3: Train Acc=0.3765, Train Loss=0.1537, Val Acc=0.3774, Val Loss=0.1540
Epoch 4: Train Acc=0.3994, Train Loss=0.1397, Val Acc=0.4020, Val Loss=0.1398
Epoch 5: Train Acc=0.4296, Train Loss=0.1303, Val Acc=0.4324, Val Loss=0.1305


0,1
epoch,▁▃▅▆█
train_acc,▁▂▆▇█
train_loss,█▇▅▃▁
val_acc,▁▂▆▇█
val_loss,█▇▅▃▁

0,1
epoch,5.0
train_acc,0.42962
train_loss,0.13035
val_acc,0.4324
val_loss,0.13051


[34m[1mwandb[0m: Agent Starting Run: x5qatbr6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▄▂▁▁
val_acc,▁▁▁▁▁
val_loss,▁▄▆▇█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: nbdxtv6k with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7451, Train Loss=0.0721, Val Acc=0.7536, Val Loss=0.0712
Epoch 2: Train Acc=0.7856, Train Loss=0.0634, Val Acc=0.7910, Val Loss=0.0626
Epoch 3: Train Acc=0.8029, Train Loss=0.0586, Val Acc=0.8088, Val Loss=0.0578
Epoch 4: Train Acc=0.8134, Train Loss=0.0553, Val Acc=0.8174, Val Loss=0.0545
Epoch 5: Train Acc=0.8209, Train Loss=0.0530, Val Acc=0.8250, Val Loss=0.0522
Epoch 6: Train Acc=0.8264, Train Loss=0.0512, Val Acc=0.8324, Val Loss=0.0504
Epoch 7: Train Acc=0.8309, Train Loss=0.0497, Val Acc=0.8374, Val Loss=0.0489
Epoch 8: Train Acc=0.8343, Train Loss=0.0485, Val Acc=0.8414, Val Loss=0.0477
Epoch 9: Train Acc=0.8377, Train Loss=0.0474, Val Acc=0.8446, Val Loss=0.0466
Epoch 10: Train Acc=0.8404, Train Loss=0.0465, Val Acc=0.8474, Val Loss=0.0458


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▆▄▃▃▂▂▂▁▁
val_acc,▁▄▅▆▆▇▇███
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.84042
train_loss,0.04653
val_acc,0.8474
val_loss,0.04577


[34m[1mwandb[0m: Agent Starting Run: 22pyrfpi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▂▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: l79di4s1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7189, Train Loss=0.0765, Val Acc=0.7246, Val Loss=0.0764
Epoch 2: Train Acc=0.7668, Train Loss=0.0650, Val Acc=0.7708, Val Loss=0.0653
Epoch 3: Train Acc=0.7920, Train Loss=0.0592, Val Acc=0.7968, Val Loss=0.0598
Epoch 4: Train Acc=0.8052, Train Loss=0.0558, Val Acc=0.8112, Val Loss=0.0565
Epoch 5: Train Acc=0.8162, Train Loss=0.0530, Val Acc=0.8230, Val Loss=0.0538


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.8162
train_loss,0.05301
val_acc,0.823
val_loss,0.05377


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ljjdkvjq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.0999, Train Loss=0.2316, Val Acc=0.1008, Val Loss=0.2314
Epoch 2: Train Acc=0.0999, Train Loss=0.2305, Val Acc=0.1008, Val Loss=0.2304
Epoch 3: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 4: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 5: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁█████
train_loss,█▂▁▁▁▁▁▁▁▁
val_acc,█████▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: tmj06jb5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7981, Train Loss=0.0609, Val Acc=0.8054, Val Loss=0.0598
Epoch 2: Train Acc=0.8185, Train Loss=0.0530, Val Acc=0.8254, Val Loss=0.0517
Epoch 3: Train Acc=0.8270, Train Loss=0.0495, Val Acc=0.8326, Val Loss=0.0483
Epoch 4: Train Acc=0.8331, Train Loss=0.0474, Val Acc=0.8408, Val Loss=0.0462
Epoch 5: Train Acc=0.8375, Train Loss=0.0459, Val Acc=0.8456, Val Loss=0.0447


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.83745
train_loss,0.04593
val_acc,0.8456
val_loss,0.04473


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ohysqdml with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1182, Train Loss=0.2299, Val Acc=0.1138, Val Loss=0.2300
Epoch 2: Train Acc=0.1906, Train Loss=0.2294, Val Acc=0.1904, Val Loss=0.2294
Epoch 3: Train Acc=0.1975, Train Loss=0.2285, Val Acc=0.1962, Val Loss=0.2286
Epoch 4: Train Acc=0.2719, Train Loss=0.2269, Val Acc=0.2716, Val Loss=0.2269
Epoch 5: Train Acc=0.3298, Train Loss=0.2231, Val Acc=0.3222, Val Loss=0.2230
Epoch 6: Train Acc=0.3289, Train Loss=0.2124, Val Acc=0.3380, Val Loss=0.2121
Epoch 7: Train Acc=0.2832, Train Loss=0.1905, Val Acc=0.2902, Val Loss=0.1900
Epoch 8: Train Acc=0.3237, Train Loss=0.1740, Val Acc=0.3278, Val Loss=0.1735
Epoch 9: Train Acc=0.3521, Train Loss=0.1649, Val Acc=0.3556, Val Loss=0.1645
Epoch 10: Train Acc=0.3741, Train Loss=0.1573, Val Acc=0.3770, Val Loss=0.1569


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▃▅▇▇▆▇▇█
train_loss,████▇▆▄▃▂▁
val_acc,▁▃▃▅▇▇▆▇▇█
val_loss,████▇▆▄▃▂▁

0,1
epoch,10.0
train_acc,0.37405
train_loss,0.15731
val_acc,0.377
val_loss,0.15689


[34m[1mwandb[0m: Agent Starting Run: w041hb8i with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▁▁▁▁
val_acc,▁▁▁▁▁
val_loss,█▁▃▃▁

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: 9f3mxr3j with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▄▄▃▂▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▄▄▅▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zaglipqf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6510, Train Loss=0.0777, Val Acc=0.6584, Val Loss=0.0771
Epoch 2: Train Acc=0.7564, Train Loss=0.0630, Val Acc=0.7632, Val Loss=0.0629
Epoch 3: Train Acc=0.7929, Train Loss=0.0566, Val Acc=0.7990, Val Loss=0.0566
Epoch 4: Train Acc=0.8069, Train Loss=0.0515, Val Acc=0.8104, Val Loss=0.0519
Epoch 5: Train Acc=0.8294, Train Loss=0.0477, Val Acc=0.8294, Val Loss=0.0484


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▅▃▂▁
val_acc,▁▅▇▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.82938
train_loss,0.04766
val_acc,0.8294
val_loss,0.04843


[34m[1mwandb[0m: Agent Starting Run: c98jv61y with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.0900, Train Loss=0.2304, Val Acc=0.0870, Val Loss=0.2304
Epoch 2: Train Acc=0.0992, Train Loss=0.2303, Val Acc=0.0970, Val Loss=0.2303
Epoch 3: Train Acc=0.1016, Train Loss=0.2302, Val Acc=0.1008, Val Loss=0.2303
Epoch 4: Train Acc=0.1040, Train Loss=0.2302, Val Acc=0.1032, Val Loss=0.2302
Epoch 5: Train Acc=0.1062, Train Loss=0.2301, Val Acc=0.1052, Val Loss=0.2301


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▆▄▃▁
val_acc,▁▅▆▇█
val_loss,█▆▄▃▁

0,1
epoch,5.0
train_acc,0.10618
train_loss,0.23012
val_acc,0.1052
val_loss,0.23013


[34m[1mwandb[0m: Agent Starting Run: daz6pzns with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.3706, Train Loss=0.2041, Val Acc=0.3690, Val Loss=0.2039
Epoch 2: Train Acc=0.6000, Train Loss=0.1218, Val Acc=0.6132, Val Loss=0.1205
Epoch 3: Train Acc=0.6756, Train Loss=0.0883, Val Acc=0.6790, Val Loss=0.0873
Epoch 4: Train Acc=0.7089, Train Loss=0.0775, Val Acc=0.7150, Val Loss=0.0767
Epoch 5: Train Acc=0.7405, Train Loss=0.0714, Val Acc=0.7448, Val Loss=0.0706
Epoch 6: Train Acc=0.7673, Train Loss=0.0664, Val Acc=0.7694, Val Loss=0.0657
Epoch 7: Train Acc=0.7859, Train Loss=0.0623, Val Acc=0.7900, Val Loss=0.0617
Epoch 8: Train Acc=0.7987, Train Loss=0.0589, Val Acc=0.8018, Val Loss=0.0584
Epoch 9: Train Acc=0.8084, Train Loss=0.0561, Val Acc=0.8126, Val Loss=0.0557
Epoch 10: Train Acc=0.8160, Train Loss=0.0539, Val Acc=0.8172, Val Loss=0.0535


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▆▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_acc,▁▅▆▆▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.81604
train_loss,0.05392
val_acc,0.8172
val_loss,0.0535


[34m[1mwandb[0m: Agent Starting Run: ayyswu3b with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2100, Train Loss=0.1671, Val Acc=0.2082, Val Loss=0.1669
Epoch 2: Train Acc=0.4381, Train Loss=0.1199, Val Acc=0.4470, Val Loss=0.1200
Epoch 3: Train Acc=0.6317, Train Loss=0.0920, Val Acc=0.6312, Val Loss=0.0915
Epoch 4: Train Acc=0.7034, Train Loss=0.0781, Val Acc=0.7024, Val Loss=0.0775
Epoch 5: Train Acc=0.7753, Train Loss=0.0669, Val Acc=0.7782, Val Loss=0.0670


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.77529
train_loss,0.0669
val_acc,0.7782
val_loss,0.06699


[34m[1mwandb[0m: Agent Starting Run: xdxbghy7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█████▇▇▆▅▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▇█████▇▇▅▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23023
val_acc,0.0914
val_loss,0.23025


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t46yd69v with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8114, Train Loss=0.0555, Val Acc=0.8168, Val Loss=0.0546
Epoch 2: Train Acc=0.8312, Train Loss=0.0484, Val Acc=0.8380, Val Loss=0.0474
Epoch 3: Train Acc=0.8404, Train Loss=0.0452, Val Acc=0.8478, Val Loss=0.0444
Epoch 4: Train Acc=0.8465, Train Loss=0.0433, Val Acc=0.8530, Val Loss=0.0426
Epoch 5: Train Acc=0.8514, Train Loss=0.0419, Val Acc=0.8576, Val Loss=0.0413
Epoch 6: Train Acc=0.8549, Train Loss=0.0408, Val Acc=0.8578, Val Loss=0.0404
Epoch 7: Train Acc=0.8582, Train Loss=0.0399, Val Acc=0.8594, Val Loss=0.0397
Epoch 8: Train Acc=0.8609, Train Loss=0.0392, Val Acc=0.8620, Val Loss=0.0391
Epoch 9: Train Acc=0.8633, Train Loss=0.0385, Val Acc=0.8622, Val Loss=0.0385
Epoch 10: Train Acc=0.8653, Train Loss=0.0379, Val Acc=0.8632, Val Loss=0.0381


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▄▆▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.86525
train_loss,0.03786
val_acc,0.8632
val_loss,0.03805


[34m[1mwandb[0m: Agent Starting Run: i8qnwidi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1717, Train Loss=0.2211, Val Acc=0.1754, Val Loss=0.2210
Epoch 2: Train Acc=0.2855, Train Loss=0.2099, Val Acc=0.2854, Val Loss=0.2097
Epoch 3: Train Acc=0.4045, Train Loss=0.1994, Val Acc=0.4042, Val Loss=0.1991
Epoch 4: Train Acc=0.4903, Train Loss=0.1895, Val Acc=0.4896, Val Loss=0.1890
Epoch 5: Train Acc=0.5294, Train Loss=0.1800, Val Acc=0.5294, Val Loss=0.1794


0,1
epoch,▁▃▅▆█
train_acc,▁▃▆▇█
train_loss,█▆▄▃▁
val_acc,▁▃▆▇█
val_loss,█▆▄▃▁

0,1
epoch,5.0
train_acc,0.52942
train_loss,0.17998
val_acc,0.5294
val_loss,0.17937


[34m[1mwandb[0m: Agent Starting Run: nufnshau with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5463, Train Loss=0.1969, Val Acc=0.5588, Val Loss=0.1968
Epoch 2: Train Acc=0.6192, Train Loss=0.1445, Val Acc=0.6266, Val Loss=0.1434
Epoch 3: Train Acc=0.6562, Train Loss=0.1090, Val Acc=0.6584, Val Loss=0.1077
Epoch 4: Train Acc=0.6787, Train Loss=0.0932, Val Acc=0.6804, Val Loss=0.0922
Epoch 5: Train Acc=0.6989, Train Loss=0.0845, Val Acc=0.7008, Val Loss=0.0836


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▂▂▁

0,1
epoch,5.0
train_acc,0.69891
train_loss,0.08452
val_acc,0.7008
val_loss,0.0836


[34m[1mwandb[0m: Agent Starting Run: f7vwyz5r with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2220, Train Loss=0.1667, Val Acc=0.2186, Val Loss=0.1667
Epoch 2: Train Acc=0.4075, Train Loss=0.1311, Val Acc=0.4126, Val Loss=0.1313
Epoch 3: Train Acc=0.5210, Train Loss=0.1153, Val Acc=0.5214, Val Loss=0.1154
Epoch 4: Train Acc=0.5816, Train Loss=0.1025, Val Acc=0.5882, Val Loss=0.1025
Epoch 5: Train Acc=0.6118, Train Loss=0.0937, Val Acc=0.6164, Val Loss=0.0934


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆██
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.61182
train_loss,0.09371
val_acc,0.6164
val_loss,0.09344


[34m[1mwandb[0m: Agent Starting Run: 7yv4yuqx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7237, Train Loss=0.1011, Val Acc=0.7306, Val Loss=0.1001
Epoch 2: Train Acc=0.7661, Train Loss=0.0829, Val Acc=0.7694, Val Loss=0.0821
Epoch 3: Train Acc=0.7842, Train Loss=0.0734, Val Acc=0.7868, Val Loss=0.0727
Epoch 4: Train Acc=0.7959, Train Loss=0.0673, Val Acc=0.7970, Val Loss=0.0667
Epoch 5: Train Acc=0.8050, Train Loss=0.0630, Val Acc=0.8072, Val Loss=0.0624
Epoch 6: Train Acc=0.8114, Train Loss=0.0597, Val Acc=0.8124, Val Loss=0.0592
Epoch 7: Train Acc=0.8166, Train Loss=0.0571, Val Acc=0.8156, Val Loss=0.0566
Epoch 8: Train Acc=0.8206, Train Loss=0.0549, Val Acc=0.8204, Val Loss=0.0545
Epoch 9: Train Acc=0.8248, Train Loss=0.0532, Val Acc=0.8242, Val Loss=0.0528
Epoch 10: Train Acc=0.8279, Train Loss=0.0517, Val Acc=0.8288, Val Loss=0.0513


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_acc,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.82785
train_loss,0.0517
val_acc,0.8288
val_loss,0.05135


[34m[1mwandb[0m: Agent Starting Run: n3htczyn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7569, Train Loss=0.0754, Val Acc=0.7584, Val Loss=0.0749
Epoch 2: Train Acc=0.8093, Train Loss=0.0568, Val Acc=0.8132, Val Loss=0.0563
Epoch 3: Train Acc=0.8261, Train Loss=0.0500, Val Acc=0.8302, Val Loss=0.0497
Epoch 4: Train Acc=0.8374, Train Loss=0.0464, Val Acc=0.8408, Val Loss=0.0466
Epoch 5: Train Acc=0.8454, Train Loss=0.0440, Val Acc=0.8480, Val Loss=0.0446
Epoch 6: Train Acc=0.8512, Train Loss=0.0421, Val Acc=0.8534, Val Loss=0.0430
Epoch 7: Train Acc=0.8562, Train Loss=0.0405, Val Acc=0.8558, Val Loss=0.0417
Epoch 8: Train Acc=0.8616, Train Loss=0.0391, Val Acc=0.8564, Val Loss=0.0406
Epoch 9: Train Acc=0.8658, Train Loss=0.0380, Val Acc=0.8576, Val Loss=0.0397
Epoch 10: Train Acc=0.8690, Train Loss=0.0370, Val Acc=0.8616, Val Loss=0.0390


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_acc,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.86902
train_loss,0.03699
val_acc,0.8616
val_loss,0.03895


[34m[1mwandb[0m: Agent Starting Run: 68jfwpgd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁████
val_acc,▁▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: stbzsjt7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1001, Train Loss=0.2312, Val Acc=0.0986, Val Loss=0.2313
Epoch 2: Train Acc=0.0803, Train Loss=0.2303, Val Acc=0.0814, Val Loss=0.2303
Epoch 3: Train Acc=0.0975, Train Loss=0.2300, Val Acc=0.0906, Val Loss=0.2301
Epoch 4: Train Acc=0.1251, Train Loss=0.2300, Val Acc=0.1144, Val Loss=0.2300
Epoch 5: Train Acc=0.1683, Train Loss=0.2299, Val Acc=0.1610, Val Loss=0.2299
Epoch 6: Train Acc=0.1925, Train Loss=0.2298, Val Acc=0.1866, Val Loss=0.2298
Epoch 7: Train Acc=0.2053, Train Loss=0.2298, Val Acc=0.1998, Val Loss=0.2298
Epoch 8: Train Acc=0.2134, Train Loss=0.2297, Val Acc=0.2086, Val Loss=0.2297
Epoch 9: Train Acc=0.2203, Train Loss=0.2296, Val Acc=0.2166, Val Loss=0.2296
Epoch 10: Train Acc=0.2271, Train Loss=0.2296, Val Acc=0.2260, Val Loss=0.2296


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▂▁▂▃▅▆▇▇██
train_loss,█▄▃▃▂▂▂▂▁▁
val_acc,▂▁▁▃▅▆▇▇██
val_loss,█▄▃▃▂▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.22713
train_loss,0.22956
val_acc,0.226
val_loss,0.22957


[34m[1mwandb[0m: Agent Starting Run: o3usj5yi with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.0947, Train Loss=0.2297, Val Acc=0.0962, Val Loss=0.2296
Epoch 2: Train Acc=0.1046, Train Loss=0.2284, Val Acc=0.1024, Val Loss=0.2282
Epoch 3: Train Acc=0.1215, Train Loss=0.2273, Val Acc=0.1210, Val Loss=0.2271
Epoch 4: Train Acc=0.1387, Train Loss=0.2263, Val Acc=0.1398, Val Loss=0.2261
Epoch 5: Train Acc=0.1582, Train Loss=0.2252, Val Acc=0.1568, Val Loss=0.2250
Epoch 6: Train Acc=0.1810, Train Loss=0.2241, Val Acc=0.1800, Val Loss=0.2239
Epoch 7: Train Acc=0.2054, Train Loss=0.2229, Val Acc=0.2070, Val Loss=0.2227
Epoch 8: Train Acc=0.2321, Train Loss=0.2216, Val Acc=0.2362, Val Loss=0.2214
Epoch 9: Train Acc=0.2633, Train Loss=0.2202, Val Acc=0.2628, Val Loss=0.2200
Epoch 10: Train Acc=0.2980, Train Loss=0.2186, Val Acc=0.2984, Val Loss=0.2184


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▂▃▃▄▅▆▇█
train_loss,█▇▆▆▅▄▄▃▂▁
val_acc,▁▁▂▃▃▄▅▆▇█
val_loss,█▇▆▆▅▄▄▃▂▁

0,1
epoch,10.0
train_acc,0.29804
train_loss,0.21859
val_acc,0.2984
val_loss,0.21839


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 89lezrq0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7421, Train Loss=0.0803, Val Acc=0.7472, Val Loss=0.0793
Epoch 2: Train Acc=0.7875, Train Loss=0.0633, Val Acc=0.7918, Val Loss=0.0623
Epoch 3: Train Acc=0.8070, Train Loss=0.0560, Val Acc=0.8158, Val Loss=0.0550
Epoch 4: Train Acc=0.8179, Train Loss=0.0520, Val Acc=0.8258, Val Loss=0.0510
Epoch 5: Train Acc=0.8254, Train Loss=0.0495, Val Acc=0.8320, Val Loss=0.0485


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▂▂▁
val_acc,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.8254
train_loss,0.04946
val_acc,0.832
val_loss,0.04851


[34m[1mwandb[0m: Agent Starting Run: n0x14i2m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 5: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 6: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 7: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 8: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 9: Train Acc=0.1008, Train Loss=0.2301, Val Acc=0.0914, Val Loss=0.2301
Epoch 10: Train Acc=0.1008, Train Loss=0.2301, Val Acc=0.0914, Val Loss=0.2301


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,███▇▇▆▆▅▄▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,████▇▇▆▅▄▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23005
val_acc,0.0914
val_loss,0.23007


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: z1irq9hv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5874, Train Loss=0.1587, Val Acc=0.5900, Val Loss=0.1586
Epoch 2: Train Acc=0.6611, Train Loss=0.0995, Val Acc=0.6702, Val Loss=0.0986
Epoch 3: Train Acc=0.6934, Train Loss=0.0831, Val Acc=0.7064, Val Loss=0.0824
Epoch 4: Train Acc=0.7210, Train Loss=0.0756, Val Acc=0.7292, Val Loss=0.0750
Epoch 5: Train Acc=0.7457, Train Loss=0.0707, Val Acc=0.7528, Val Loss=0.0702


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▃▂▁▁
val_acc,▁▄▆▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,0.74575
train_loss,0.0707
val_acc,0.7528
val_loss,0.07022


[34m[1mwandb[0m: Agent Starting Run: pm67qcpz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.3822, Train Loss=0.1466, Val Acc=0.3904, Val Loss=0.1471
Epoch 2: Train Acc=0.4084, Train Loss=0.1184, Val Acc=0.4158, Val Loss=0.1187
Epoch 3: Train Acc=0.4772, Train Loss=0.1090, Val Acc=0.4770, Val Loss=0.1092
Epoch 4: Train Acc=0.4845, Train Loss=0.1039, Val Acc=0.4822, Val Loss=0.1044
Epoch 5: Train Acc=0.5069, Train Loss=0.1014, Val Acc=0.5038, Val Loss=0.1020


0,1
epoch,▁▃▅▆█
train_acc,▁▂▆▇█
train_loss,█▄▂▁▁
val_acc,▁▃▆▇█
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.50695
train_loss,0.1014
val_acc,0.5038
val_loss,0.10195


[34m[1mwandb[0m: Agent Starting Run: au3tm2hh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0990, Train Loss=0.2303, Val Acc=0.1112, Val Loss=0.2302
Epoch 2: Train Acc=0.0990, Train Loss=0.2303, Val Acc=0.1112, Val Loss=0.2302
Epoch 3: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1012, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▅██
train_loss,█▃▂▁▁
val_acc,██▄▁▁
val_loss,▁▃▆▇█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: pbnlapea with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7454, Train Loss=0.0716, Val Acc=0.7528, Val Loss=0.0718
Epoch 2: Train Acc=0.8062, Train Loss=0.0531, Val Acc=0.8146, Val Loss=0.0529
Epoch 3: Train Acc=0.8299, Train Loss=0.0477, Val Acc=0.8330, Val Loss=0.0481
Epoch 4: Train Acc=0.8478, Train Loss=0.0439, Val Acc=0.8474, Val Loss=0.0451
Epoch 5: Train Acc=0.8570, Train Loss=0.0412, Val Acc=0.8540, Val Loss=0.0433


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▂▂▁
val_acc,▁▅▇██
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,0.85704
train_loss,0.04118
val_acc,0.854
val_loss,0.04332


[34m[1mwandb[0m: Agent Starting Run: ick0luz3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7449, Train Loss=0.0720, Val Acc=0.7522, Val Loss=0.0713
Epoch 2: Train Acc=0.7916, Train Loss=0.0625, Val Acc=0.7942, Val Loss=0.0618
Epoch 3: Train Acc=0.8072, Train Loss=0.0576, Val Acc=0.8112, Val Loss=0.0568
Epoch 4: Train Acc=0.8158, Train Loss=0.0546, Val Acc=0.8198, Val Loss=0.0537
Epoch 5: Train Acc=0.8217, Train Loss=0.0524, Val Acc=0.8266, Val Loss=0.0515


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▅▃▂▁
val_acc,▁▅▇▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.82169
train_loss,0.05243
val_acc,0.8266
val_loss,0.05147


[34m[1mwandb[0m: Agent Starting Run: h85pm74n with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6291, Train Loss=0.0922, Val Acc=0.6400, Val Loss=0.0913
Epoch 2: Train Acc=0.6772, Train Loss=0.0853, Val Acc=0.6862, Val Loss=0.0847
Epoch 3: Train Acc=0.7048, Train Loss=0.0802, Val Acc=0.7114, Val Loss=0.0798
Epoch 4: Train Acc=0.7226, Train Loss=0.0764, Val Acc=0.7300, Val Loss=0.0761
Epoch 5: Train Acc=0.7361, Train Loss=0.0729, Val Acc=0.7384, Val Loss=0.0729
Epoch 6: Train Acc=0.7478, Train Loss=0.0697, Val Acc=0.7506, Val Loss=0.0699
Epoch 7: Train Acc=0.7562, Train Loss=0.0669, Val Acc=0.7590, Val Loss=0.0671
Epoch 8: Train Acc=0.7622, Train Loss=0.0647, Val Acc=0.7652, Val Loss=0.0649
Epoch 9: Train Acc=0.7688, Train Loss=0.0626, Val Acc=0.7734, Val Loss=0.0628
Epoch 10: Train Acc=0.7751, Train Loss=0.0605, Val Acc=0.7804, Val Loss=0.0609


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▅▆▇▇▇██
train_loss,█▆▅▅▄▃▂▂▁▁
val_acc,▁▃▅▅▆▇▇▇██
val_loss,█▆▅▄▄▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.77511
train_loss,0.06049
val_acc,0.7804
val_loss,0.06093


[34m[1mwandb[0m: Agent Starting Run: rhqeq3o2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.4797, Train Loss=0.1152, Val Acc=0.4820, Val Loss=0.1152
Epoch 2: Train Acc=0.5814, Train Loss=0.1010, Val Acc=0.5814, Val Loss=0.1006
Epoch 3: Train Acc=0.6257, Train Loss=0.0866, Val Acc=0.6296, Val Loss=0.0862
Epoch 4: Train Acc=0.6523, Train Loss=0.0799, Val Acc=0.6582, Val Loss=0.0794
Epoch 5: Train Acc=0.6999, Train Loss=0.0750, Val Acc=0.7020, Val Loss=0.0745
Epoch 6: Train Acc=0.7179, Train Loss=0.0707, Val Acc=0.7214, Val Loss=0.0702
Epoch 7: Train Acc=0.7254, Train Loss=0.0675, Val Acc=0.7296, Val Loss=0.0671
Epoch 8: Train Acc=0.7304, Train Loss=0.0652, Val Acc=0.7378, Val Loss=0.0648
Epoch 9: Train Acc=0.7675, Train Loss=0.0630, Val Acc=0.7696, Val Loss=0.0627
Epoch 10: Train Acc=0.7817, Train Loss=0.0605, Val Acc=0.7902, Val Loss=0.0602


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_acc,▁▃▄▅▆▆▇▇██
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.78165
train_loss,0.06049
val_acc,0.7902
val_loss,0.06021


[34m[1mwandb[0m: Agent Starting Run: a40mteb3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7332, Train Loss=0.0707, Val Acc=0.7360, Val Loss=0.0704
Epoch 2: Train Acc=0.8446, Train Loss=0.0433, Val Acc=0.8466, Val Loss=0.0444
Epoch 3: Train Acc=0.8597, Train Loss=0.0393, Val Acc=0.8622, Val Loss=0.0420
Epoch 4: Train Acc=0.8737, Train Loss=0.0354, Val Acc=0.8728, Val Loss=0.0394
Epoch 5: Train Acc=0.8797, Train Loss=0.0353, Val Acc=0.8740, Val Loss=0.0415
Epoch 6: Train Acc=0.8781, Train Loss=0.0356, Val Acc=0.8722, Val Loss=0.0430
Epoch 7: Train Acc=0.8733, Train Loss=0.0375, Val Acc=0.8670, Val Loss=0.0471
Epoch 8: Train Acc=0.8768, Train Loss=0.0377, Val Acc=0.8722, Val Loss=0.0465
Epoch 9: Train Acc=0.8884, Train Loss=0.0327, Val Acc=0.8764, Val Loss=0.0409
Epoch 10: Train Acc=0.8767, Train Loss=0.0380, Val Acc=0.8702, Val Loss=0.0463


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██▇▇█▇
train_loss,█▃▂▂▁▂▂▂▁▂
val_acc,▁▇▇███████
val_loss,█▂▂▁▁▂▃▃▁▃

0,1
epoch,10.0
train_acc,0.87671
train_loss,0.03798
val_acc,0.8702
val_loss,0.04632


[34m[1mwandb[0m: Agent Starting Run: e1d3c4b8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8413, Train Loss=0.0456, Val Acc=0.8462, Val Loss=0.0449
Epoch 2: Train Acc=0.8595, Train Loss=0.0392, Val Acc=0.8604, Val Loss=0.0394
Epoch 3: Train Acc=0.8706, Train Loss=0.0360, Val Acc=0.8674, Val Loss=0.0370
Epoch 4: Train Acc=0.8777, Train Loss=0.0339, Val Acc=0.8736, Val Loss=0.0359
Epoch 5: Train Acc=0.8823, Train Loss=0.0325, Val Acc=0.8780, Val Loss=0.0354


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.88233
train_loss,0.03254
val_acc,0.878
val_loss,0.03539


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s9kuxo5z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5188, Train Loss=0.1065, Val Acc=0.5252, Val Loss=0.1063
Epoch 2: Train Acc=0.5634, Train Loss=0.1022, Val Acc=0.5700, Val Loss=0.1020
Epoch 3: Train Acc=0.5795, Train Loss=0.1002, Val Acc=0.5894, Val Loss=0.1000
Epoch 4: Train Acc=0.5892, Train Loss=0.0988, Val Acc=0.5982, Val Loss=0.0986
Epoch 5: Train Acc=0.5985, Train Loss=0.0976, Val Acc=0.6080, Val Loss=0.0974


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.59855
train_loss,0.0976
val_acc,0.608
val_loss,0.09743


[34m[1mwandb[0m: Agent Starting Run: x6q8kuq4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5459, Train Loss=0.1076, Val Acc=0.5566, Val Loss=0.1073
Epoch 2: Train Acc=0.5695, Train Loss=0.1035, Val Acc=0.5770, Val Loss=0.1032
Epoch 3: Train Acc=0.5987, Train Loss=0.0984, Val Acc=0.6018, Val Loss=0.0983
Epoch 4: Train Acc=0.6398, Train Loss=0.0916, Val Acc=0.6432, Val Loss=0.0915
Epoch 5: Train Acc=0.6635, Train Loss=0.0878, Val Acc=0.6666, Val Loss=0.0878


0,1
epoch,▁▃▅▆█
train_acc,▁▂▄▇█
train_loss,█▇▅▂▁
val_acc,▁▂▄▇█
val_loss,█▇▅▂▁

0,1
epoch,5.0
train_acc,0.66345
train_loss,0.08782
val_acc,0.6666
val_loss,0.08784


[34m[1mwandb[0m: Agent Starting Run: 296r9xup with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▄▃▃▂▂▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▄▅▅▆▇▇██

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: i1djkwcp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▅▄▄▃▂▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▄▅▅▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: hvmugcoe with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8635, Train Loss=0.0373, Val Acc=0.8616, Val Loss=0.0374
Epoch 2: Train Acc=0.8795, Train Loss=0.0330, Val Acc=0.8762, Val Loss=0.0345
Epoch 3: Train Acc=0.8886, Train Loss=0.0302, Val Acc=0.8796, Val Loss=0.0329
Epoch 4: Train Acc=0.8948, Train Loss=0.0282, Val Acc=0.8834, Val Loss=0.0319
Epoch 5: Train Acc=0.9001, Train Loss=0.0266, Val Acc=0.8866, Val Loss=0.0314


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.90007
train_loss,0.02657
val_acc,0.8866
val_loss,0.03138


[34m[1mwandb[0m: Agent Starting Run: fbb8tww0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8180, Train Loss=0.0514, Val Acc=0.8244, Val Loss=0.0512
Epoch 2: Train Acc=0.8563, Train Loss=0.0409, Val Acc=0.8524, Val Loss=0.0413
Epoch 3: Train Acc=0.8698, Train Loss=0.0370, Val Acc=0.8632, Val Loss=0.0381
Epoch 4: Train Acc=0.8770, Train Loss=0.0347, Val Acc=0.8700, Val Loss=0.0363
Epoch 5: Train Acc=0.8820, Train Loss=0.0330, Val Acc=0.8738, Val Loss=0.0351


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▄▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.88202
train_loss,0.03296
val_acc,0.8738
val_loss,0.03507


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pmkipoal with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8391, Train Loss=0.0458, Val Acc=0.8412, Val Loss=0.0455
Epoch 2: Train Acc=0.8485, Train Loss=0.0425, Val Acc=0.8518, Val Loss=0.0426
Epoch 3: Train Acc=0.8583, Train Loss=0.0401, Val Acc=0.8600, Val Loss=0.0407
Epoch 4: Train Acc=0.8649, Train Loss=0.0382, Val Acc=0.8638, Val Loss=0.0392
Epoch 5: Train Acc=0.8691, Train Loss=0.0368, Val Acc=0.8690, Val Loss=0.0382


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▇█
train_loss,█▅▄▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.86911
train_loss,0.0368
val_acc,0.869
val_loss,0.03821


[34m[1mwandb[0m: Agent Starting Run: pc8xxtim with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 2: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 3: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 4: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 5: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2307


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▄▃▁

0,1
epoch,5.0
train_acc,0.10018
train_loss,0.23064
val_acc,0.098
val_loss,0.23065


[34m[1mwandb[0m: Agent Starting Run: v0yhh5x1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 2: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 3: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 4: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 5: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 6: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2307
Epoch 7: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2306
Epoch 8: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2306
Epoch 9: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2306
Epoch 10: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2306


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▅▄▃▂▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▅▅▄▃▂▂▁

0,1
epoch,10.0
train_acc,0.10018
train_loss,0.23059
val_acc,0.098
val_loss,0.23062


[34m[1mwandb[0m: Agent Starting Run: h337ijn4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁████
val_acc,▁▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: 7qe2rwt7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.3423, Train Loss=0.2228, Val Acc=0.3486, Val Loss=0.2226
Epoch 2: Train Acc=0.4246, Train Loss=0.1591, Val Acc=0.4322, Val Loss=0.1585
Epoch 3: Train Acc=0.5663, Train Loss=0.1204, Val Acc=0.5684, Val Loss=0.1195
Epoch 4: Train Acc=0.6079, Train Loss=0.1003, Val Acc=0.6160, Val Loss=0.0994
Epoch 5: Train Acc=0.6873, Train Loss=0.0887, Val Acc=0.6918, Val Loss=0.0880
Epoch 6: Train Acc=0.7113, Train Loss=0.0811, Val Acc=0.7174, Val Loss=0.0806
Epoch 7: Train Acc=0.7288, Train Loss=0.0757, Val Acc=0.7332, Val Loss=0.0753
Epoch 8: Train Acc=0.7424, Train Loss=0.0716, Val Acc=0.7464, Val Loss=0.0712
Epoch 9: Train Acc=0.7557, Train Loss=0.0682, Val Acc=0.7608, Val Loss=0.0676
Epoch 10: Train Acc=0.7653, Train Loss=0.0651, Val Acc=0.7724, Val Loss=0.0643


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▅▅▇▇▇███
train_loss,█▅▃▃▂▂▁▁▁▁
val_acc,▁▂▅▅▇▇▇███
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.76527
train_loss,0.0651
val_acc,0.7724
val_loss,0.06435


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cn0n1yb3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁█▇▇▇▆▆▆▆▆
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁████▇▇▇▆▆

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23027
val_acc,0.0976
val_loss,0.23031


[34m[1mwandb[0m: Agent Starting Run: iqrrf8au with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5257, Train Loss=0.1116, Val Acc=0.5350, Val Loss=0.1112
Epoch 2: Train Acc=0.5583, Train Loss=0.1035, Val Acc=0.5716, Val Loss=0.1033
Epoch 3: Train Acc=0.5705, Train Loss=0.1009, Val Acc=0.5824, Val Loss=0.1008
Epoch 4: Train Acc=0.5802, Train Loss=0.0993, Val Acc=0.5934, Val Loss=0.0992
Epoch 5: Train Acc=0.5887, Train Loss=0.0981, Val Acc=0.6002, Val Loss=0.0980
Epoch 6: Train Acc=0.5957, Train Loss=0.0970, Val Acc=0.6072, Val Loss=0.0969
Epoch 7: Train Acc=0.6018, Train Loss=0.0961, Val Acc=0.6130, Val Loss=0.0960
Epoch 8: Train Acc=0.6071, Train Loss=0.0953, Val Acc=0.6174, Val Loss=0.0952
Epoch 9: Train Acc=0.6113, Train Loss=0.0946, Val Acc=0.6190, Val Loss=0.0945
Epoch 10: Train Acc=0.6160, Train Loss=0.0940, Val Acc=0.6222, Val Loss=0.0939


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▄▅▆▆▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.616
train_loss,0.09398
val_acc,0.6222
val_loss,0.09391


[34m[1mwandb[0m: Agent Starting Run: 5d4g8p4w with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7113, Train Loss=0.0977, Val Acc=0.7048, Val Loss=0.0972
Epoch 2: Train Acc=0.7534, Train Loss=0.0821, Val Acc=0.7506, Val Loss=0.0817
Epoch 3: Train Acc=0.7795, Train Loss=0.0735, Val Acc=0.7808, Val Loss=0.0731
Epoch 4: Train Acc=0.7959, Train Loss=0.0676, Val Acc=0.7964, Val Loss=0.0672
Epoch 5: Train Acc=0.8064, Train Loss=0.0632, Val Acc=0.8066, Val Loss=0.0629
Epoch 6: Train Acc=0.8138, Train Loss=0.0599, Val Acc=0.8172, Val Loss=0.0595
Epoch 7: Train Acc=0.8199, Train Loss=0.0572, Val Acc=0.8218, Val Loss=0.0568
Epoch 8: Train Acc=0.8238, Train Loss=0.0550, Val Acc=0.8250, Val Loss=0.0546
Epoch 9: Train Acc=0.8275, Train Loss=0.0532, Val Acc=0.8274, Val Loss=0.0528
Epoch 10: Train Acc=0.8305, Train Loss=0.0517, Val Acc=0.8310, Val Loss=0.0512


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▆▇▇▇███
train_loss,█▆▄▃▃▂▂▂▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.83045
train_loss,0.05166
val_acc,0.831
val_loss,0.05125


[34m[1mwandb[0m: Agent Starting Run: l2408mt8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5563, Train Loss=0.1031, Val Acc=0.5666, Val Loss=0.1030
Epoch 2: Train Acc=0.5889, Train Loss=0.0991, Val Acc=0.5992, Val Loss=0.0989
Epoch 3: Train Acc=0.6008, Train Loss=0.0971, Val Acc=0.6126, Val Loss=0.0969
Epoch 4: Train Acc=0.6103, Train Loss=0.0956, Val Acc=0.6200, Val Loss=0.0954
Epoch 5: Train Acc=0.6179, Train Loss=0.0944, Val Acc=0.6252, Val Loss=0.0942


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.61791
train_loss,0.09443
val_acc,0.6252
val_loss,0.09418


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 19daiarg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▇▅▃▁
val_acc,▁▁▁▁▁
val_loss,█▇▅▃▁

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23028
val_acc,0.0976
val_loss,0.23032


[34m[1mwandb[0m: Agent Starting Run: 5wj839jn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8585, Train Loss=0.0391, Val Acc=0.8580, Val Loss=0.0394
Epoch 2: Train Acc=0.8718, Train Loss=0.0349, Val Acc=0.8680, Val Loss=0.0361
Epoch 3: Train Acc=0.8815, Train Loss=0.0322, Val Acc=0.8736, Val Loss=0.0341
Epoch 4: Train Acc=0.8880, Train Loss=0.0303, Val Acc=0.8762, Val Loss=0.0329
Epoch 5: Train Acc=0.8929, Train Loss=0.0290, Val Acc=0.8812, Val Loss=0.0321
Epoch 6: Train Acc=0.8964, Train Loss=0.0279, Val Acc=0.8826, Val Loss=0.0316
Epoch 7: Train Acc=0.8992, Train Loss=0.0270, Val Acc=0.8836, Val Loss=0.0312
Epoch 8: Train Acc=0.9013, Train Loss=0.0262, Val Acc=0.8864, Val Loss=0.0310
Epoch 9: Train Acc=0.9042, Train Loss=0.0255, Val Acc=0.8866, Val Loss=0.0308
Epoch 10: Train Acc=0.9069, Train Loss=0.0248, Val Acc=0.8870, Val Loss=0.0307


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_acc,▁▃▅▅▇▇▇███
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.90689
train_loss,0.02485
val_acc,0.887
val_loss,0.03073


[34m[1mwandb[0m: Agent Starting Run: 3uihup1a with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▃▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: dqsq6y0z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8234, Train Loss=0.0499, Val Acc=0.8258, Val Loss=0.0493
Epoch 2: Train Acc=0.8482, Train Loss=0.0442, Val Acc=0.8464, Val Loss=0.0440
Epoch 3: Train Acc=0.8574, Train Loss=0.0413, Val Acc=0.8576, Val Loss=0.0415
Epoch 4: Train Acc=0.8620, Train Loss=0.0397, Val Acc=0.8612, Val Loss=0.0401
Epoch 5: Train Acc=0.8655, Train Loss=0.0385, Val Acc=0.8634, Val Loss=0.0393
Epoch 6: Train Acc=0.8684, Train Loss=0.0375, Val Acc=0.8660, Val Loss=0.0386
Epoch 7: Train Acc=0.8704, Train Loss=0.0367, Val Acc=0.8676, Val Loss=0.0382
Epoch 8: Train Acc=0.8724, Train Loss=0.0361, Val Acc=0.8676, Val Loss=0.0378
Epoch 9: Train Acc=0.8744, Train Loss=0.0353, Val Acc=0.8686, Val Loss=0.0372
Epoch 10: Train Acc=0.8759, Train Loss=0.0348, Val Acc=0.8682, Val Loss=0.0369


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▆▇▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▄▆▇▇█████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.87593
train_loss,0.03477
val_acc,0.8682
val_loss,0.03689


[34m[1mwandb[0m: Agent Starting Run: ivd1i6pb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6139, Train Loss=0.0944, Val Acc=0.6182, Val Loss=0.0946
Epoch 2: Train Acc=0.6512, Train Loss=0.0909, Val Acc=0.6554, Val Loss=0.0911
Epoch 3: Train Acc=0.6705, Train Loss=0.0876, Val Acc=0.6790, Val Loss=0.0878
Epoch 4: Train Acc=0.6852, Train Loss=0.0852, Val Acc=0.6940, Val Loss=0.0854
Epoch 5: Train Acc=0.6975, Train Loss=0.0829, Val Acc=0.7052, Val Loss=0.0832


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▆▄▂▁
val_acc,▁▄▆▇█
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_acc,0.69747
train_loss,0.08294
val_acc,0.7052
val_loss,0.08319


[34m[1mwandb[0m: Agent Starting Run: 5y8nijfw with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4270, Train Loss=0.1581, Val Acc=0.4390, Val Loss=0.1576
Epoch 2: Train Acc=0.5174, Train Loss=0.1228, Val Acc=0.5234, Val Loss=0.1224
Epoch 3: Train Acc=0.6088, Train Loss=0.1058, Val Acc=0.6092, Val Loss=0.1058
Epoch 4: Train Acc=0.6880, Train Loss=0.0934, Val Acc=0.6882, Val Loss=0.0933
Epoch 5: Train Acc=0.7117, Train Loss=0.0824, Val Acc=0.7124, Val Loss=0.0823
Epoch 6: Train Acc=0.7324, Train Loss=0.0732, Val Acc=0.7354, Val Loss=0.0731
Epoch 7: Train Acc=0.7502, Train Loss=0.0668, Val Acc=0.7578, Val Loss=0.0665
Epoch 8: Train Acc=0.7634, Train Loss=0.0626, Val Acc=0.7720, Val Loss=0.0621
Epoch 9: Train Acc=0.7774, Train Loss=0.0595, Val Acc=0.7892, Val Loss=0.0589
Epoch 10: Train Acc=0.7929, Train Loss=0.0566, Val Acc=0.8014, Val Loss=0.0558


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▆▆▇▇▇██
train_loss,█▆▄▄▃▂▂▁▁▁
val_acc,▁▃▄▆▆▇▇▇██
val_loss,█▆▄▄▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.79295
train_loss,0.05655
val_acc,0.8014
val_loss,0.05581


[34m[1mwandb[0m: Agent Starting Run: qamy5mox with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7085, Train Loss=0.0763, Val Acc=0.7096, Val Loss=0.0757
Epoch 2: Train Acc=0.7413, Train Loss=0.0598, Val Acc=0.7420, Val Loss=0.0597
Epoch 3: Train Acc=0.7937, Train Loss=0.0529, Val Acc=0.7978, Val Loss=0.0527
Epoch 4: Train Acc=0.8430, Train Loss=0.0448, Val Acc=0.8460, Val Loss=0.0449
Epoch 5: Train Acc=0.8546, Train Loss=0.0413, Val Acc=0.8546, Val Loss=0.0418


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▇█
train_loss,█▅▃▂▁
val_acc,▁▃▅██
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.85462
train_loss,0.04131
val_acc,0.8546
val_loss,0.0418


[34m[1mwandb[0m: Agent Starting Run: up4fuse6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4417, Train Loss=0.1778, Val Acc=0.4356, Val Loss=0.1778
Epoch 2: Train Acc=0.4879, Train Loss=0.1550, Val Acc=0.4830, Val Loss=0.1548
Epoch 3: Train Acc=0.5556, Train Loss=0.1380, Val Acc=0.5548, Val Loss=0.1377
Epoch 4: Train Acc=0.5583, Train Loss=0.1268, Val Acc=0.5584, Val Loss=0.1264
Epoch 5: Train Acc=0.5665, Train Loss=0.1191, Val Acc=0.5684, Val Loss=0.1188


0,1
epoch,▁▃▅▆█
train_acc,▁▄▇██
train_loss,█▅▃▂▁
val_acc,▁▃▇▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.56653
train_loss,0.11906
val_acc,0.5684
val_loss,0.11877


[34m[1mwandb[0m: Agent Starting Run: epu2xgqf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4836, Train Loss=0.1127, Val Acc=0.4814, Val Loss=0.1129
Epoch 2: Train Acc=0.5576, Train Loss=0.1006, Val Acc=0.5596, Val Loss=0.1008
Epoch 3: Train Acc=0.6282, Train Loss=0.0876, Val Acc=0.6294, Val Loss=0.0876
Epoch 4: Train Acc=0.7015, Train Loss=0.0766, Val Acc=0.7032, Val Loss=0.0765
Epoch 5: Train Acc=0.7335, Train Loss=0.0705, Val Acc=0.7340, Val Loss=0.0704
Epoch 6: Train Acc=0.7791, Train Loss=0.0659, Val Acc=0.7814, Val Loss=0.0657
Epoch 7: Train Acc=0.7915, Train Loss=0.0613, Val Acc=0.7928, Val Loss=0.0612
Epoch 8: Train Acc=0.8009, Train Loss=0.0573, Val Acc=0.8012, Val Loss=0.0575
Epoch 9: Train Acc=0.8076, Train Loss=0.0546, Val Acc=0.8082, Val Loss=0.0550
Epoch 10: Train Acc=0.8124, Train Loss=0.0526, Val Acc=0.8114, Val Loss=0.0533


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▆▆▇████
train_loss,█▇▅▄▃▃▂▂▁▁
val_acc,▁▃▄▆▆▇████
val_loss,█▇▅▄▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.81238
train_loss,0.05264
val_acc,0.8114
val_loss,0.05327


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p2g94ofc with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▄▃▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▃▅▆▆▇▇███

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xm4m9nwf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7180, Train Loss=0.0755, Val Acc=0.7272, Val Loss=0.0756
Epoch 2: Train Acc=0.7938, Train Loss=0.0568, Val Acc=0.7988, Val Loss=0.0567
Epoch 3: Train Acc=0.8116, Train Loss=0.0503, Val Acc=0.8136, Val Loss=0.0506
Epoch 4: Train Acc=0.8369, Train Loss=0.0463, Val Acc=0.8312, Val Loss=0.0469
Epoch 5: Train Acc=0.8493, Train Loss=0.0435, Val Acc=0.8444, Val Loss=0.0444
Epoch 6: Train Acc=0.8559, Train Loss=0.0416, Val Acc=0.8490, Val Loss=0.0427
Epoch 7: Train Acc=0.8618, Train Loss=0.0400, Val Acc=0.8556, Val Loss=0.0414
Epoch 8: Train Acc=0.8660, Train Loss=0.0386, Val Acc=0.8602, Val Loss=0.0404
Epoch 9: Train Acc=0.8700, Train Loss=0.0374, Val Acc=0.8630, Val Loss=0.0395
Epoch 10: Train Acc=0.8733, Train Loss=0.0364, Val Acc=0.8658, Val Loss=0.0387


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_acc,▁▅▅▆▇▇▇███
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.87333
train_loss,0.03637
val_acc,0.8658
val_loss,0.03871


[34m[1mwandb[0m: Agent Starting Run: fik8889x with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5780, Train Loss=0.1006, Val Acc=0.5844, Val Loss=0.1002
Epoch 2: Train Acc=0.6041, Train Loss=0.0965, Val Acc=0.6122, Val Loss=0.0961
Epoch 3: Train Acc=0.6227, Train Loss=0.0939, Val Acc=0.6268, Val Loss=0.0935
Epoch 4: Train Acc=0.6367, Train Loss=0.0919, Val Acc=0.6402, Val Loss=0.0914
Epoch 5: Train Acc=0.6484, Train Loss=0.0902, Val Acc=0.6506, Val Loss=0.0897


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▇█
train_loss,█▅▃▂▁
val_acc,▁▄▅▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.64836
train_loss,0.09018
val_acc,0.6506
val_loss,0.08972


[34m[1mwandb[0m: Agent Starting Run: a7xmrak0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5541, Train Loss=0.1673, Val Acc=0.5614, Val Loss=0.1665
Epoch 2: Train Acc=0.6259, Train Loss=0.1392, Val Acc=0.6338, Val Loss=0.1382
Epoch 3: Train Acc=0.6580, Train Loss=0.1214, Val Acc=0.6666, Val Loss=0.1204
Epoch 4: Train Acc=0.6751, Train Loss=0.1088, Val Acc=0.6828, Val Loss=0.1079
Epoch 5: Train Acc=0.6912, Train Loss=0.0997, Val Acc=0.6960, Val Loss=0.0987
Epoch 6: Train Acc=0.7047, Train Loss=0.0928, Val Acc=0.7100, Val Loss=0.0918
Epoch 7: Train Acc=0.7171, Train Loss=0.0874, Val Acc=0.7234, Val Loss=0.0864
Epoch 8: Train Acc=0.7299, Train Loss=0.0830, Val Acc=0.7372, Val Loss=0.0820
Epoch 9: Train Acc=0.7396, Train Loss=0.0793, Val Acc=0.7456, Val Loss=0.0783
Epoch 10: Train Acc=0.7489, Train Loss=0.0762, Val Acc=0.7550, Val Loss=0.0751


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▅▆▆▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▄▅▅▆▆▇▇██
val_loss,█▆▄▄▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.74885
train_loss,0.0762
val_acc,0.755
val_loss,0.07513


[34m[1mwandb[0m: Agent Starting Run: an9bkmis with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1000, Train Loss=0.2334, Val Acc=0.1002, Val Loss=0.2336
Epoch 2: Train Acc=0.1000, Train Loss=0.2312, Val Acc=0.1002, Val Loss=0.2314
Epoch 3: Train Acc=0.0997, Train Loss=0.2306, Val Acc=0.1000, Val Loss=0.2307
Epoch 4: Train Acc=0.1004, Train Loss=0.2304, Val Acc=0.0980, Val Loss=0.2305
Epoch 5: Train Acc=0.1036, Train Loss=0.2304, Val Acc=0.0956, Val Loss=0.2304
Epoch 6: Train Acc=0.1050, Train Loss=0.2303, Val Acc=0.0956, Val Loss=0.2304
Epoch 7: Train Acc=0.0967, Train Loss=0.2303, Val Acc=0.0870, Val Loss=0.2304
Epoch 8: Train Acc=0.0885, Train Loss=0.2303, Val Acc=0.0792, Val Loss=0.2303
Epoch 9: Train Acc=0.0866, Train Loss=0.2303, Val Acc=0.0742, Val Loss=0.2303
Epoch 10: Train Acc=0.0870, Train Loss=0.2303, Val Acc=0.0742, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▆▆▆▆▇█▅▂▁▁
train_loss,█▃▂▁▁▁▁▁▁▁
val_acc,███▇▇▇▄▂▁▁
val_loss,█▃▂▁▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.08698
train_loss,0.2303
val_acc,0.0742
val_loss,0.23032


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5hn8qjxz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8203, Train Loss=0.0519, Val Acc=0.8250, Val Loss=0.0509
Epoch 2: Train Acc=0.8511, Train Loss=0.0433, Val Acc=0.8538, Val Loss=0.0426
Epoch 3: Train Acc=0.8595, Train Loss=0.0401, Val Acc=0.8594, Val Loss=0.0401
Epoch 4: Train Acc=0.8638, Train Loss=0.0385, Val Acc=0.8614, Val Loss=0.0391
Epoch 5: Train Acc=0.8667, Train Loss=0.0377, Val Acc=0.8630, Val Loss=0.0389
Epoch 6: Train Acc=0.8703, Train Loss=0.0363, Val Acc=0.8650, Val Loss=0.0381
Epoch 7: Train Acc=0.8729, Train Loss=0.0354, Val Acc=0.8670, Val Loss=0.0375
Epoch 8: Train Acc=0.8755, Train Loss=0.0346, Val Acc=0.8680, Val Loss=0.0371
Epoch 9: Train Acc=0.8783, Train Loss=0.0337, Val Acc=0.8700, Val Loss=0.0366
Epoch 10: Train Acc=0.8809, Train Loss=0.0327, Val Acc=0.8732, Val Loss=0.0360


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▅▆▆▇▇▇▇██
val_loss,█▄▃▂▂▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.88093
train_loss,0.03274
val_acc,0.8732
val_loss,0.03598


[34m[1mwandb[0m: Agent Starting Run: ae3b2tya with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▄▂▁▁
val_acc,▁▁▁▁▁
val_loss,▁▄▆▇█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: bneyrogm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁█████████
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁█████████

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23029


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ddtr9lxi with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4326, Train Loss=0.1348, Val Acc=0.4346, Val Loss=0.1347
Epoch 2: Train Acc=0.5007, Train Loss=0.1141, Val Acc=0.5054, Val Loss=0.1139
Epoch 3: Train Acc=0.5340, Train Loss=0.1044, Val Acc=0.5386, Val Loss=0.1040
Epoch 4: Train Acc=0.5425, Train Loss=0.0998, Val Acc=0.5518, Val Loss=0.0992
Epoch 5: Train Acc=0.5510, Train Loss=0.0967, Val Acc=0.5592, Val Loss=0.0962


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▄▂▂▁
val_acc,▁▅▇██
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.55104
train_loss,0.09673
val_acc,0.5592
val_loss,0.09621


[34m[1mwandb[0m: Agent Starting Run: 78meprjg with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2118, Train Loss=0.1696, Val Acc=0.2078, Val Loss=0.1696
Epoch 2: Train Acc=0.3960, Train Loss=0.1420, Val Acc=0.3972, Val Loss=0.1421
Epoch 3: Train Acc=0.4734, Train Loss=0.1237, Val Acc=0.4722, Val Loss=0.1238
Epoch 4: Train Acc=0.5447, Train Loss=0.1105, Val Acc=0.5456, Val Loss=0.1106
Epoch 5: Train Acc=0.5837, Train Loss=0.0999, Val Acc=0.5876, Val Loss=0.0997


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.58369
train_loss,0.09986
val_acc,0.5876
val_loss,0.09972


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: spyio7al with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7901, Train Loss=0.0656, Val Acc=0.7948, Val Loss=0.0649
Epoch 2: Train Acc=0.8174, Train Loss=0.0556, Val Acc=0.8226, Val Loss=0.0547
Epoch 3: Train Acc=0.8285, Train Loss=0.0511, Val Acc=0.8334, Val Loss=0.0501
Epoch 4: Train Acc=0.8343, Train Loss=0.0485, Val Acc=0.8400, Val Loss=0.0475
Epoch 5: Train Acc=0.8389, Train Loss=0.0467, Val Acc=0.8440, Val Loss=0.0457
Epoch 6: Train Acc=0.8425, Train Loss=0.0453, Val Acc=0.8480, Val Loss=0.0444
Epoch 7: Train Acc=0.8454, Train Loss=0.0442, Val Acc=0.8514, Val Loss=0.0434
Epoch 8: Train Acc=0.8479, Train Loss=0.0433, Val Acc=0.8544, Val Loss=0.0426
Epoch 9: Train Acc=0.8503, Train Loss=0.0426, Val Acc=0.8576, Val Loss=0.0419
Epoch 10: Train Acc=0.8522, Train Loss=0.0419, Val Acc=0.8594, Val Loss=0.0413


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.85218
train_loss,0.04187
val_acc,0.8594
val_loss,0.04129


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d63bbleo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▅▃▂▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▇█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: a6f3lvvu with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5428, Train Loss=0.1579, Val Acc=0.5444, Val Loss=0.1577
Epoch 2: Train Acc=0.6682, Train Loss=0.0973, Val Acc=0.6796, Val Loss=0.0962
Epoch 3: Train Acc=0.7067, Train Loss=0.0808, Val Acc=0.7160, Val Loss=0.0797
Epoch 4: Train Acc=0.7349, Train Loss=0.0736, Val Acc=0.7442, Val Loss=0.0724
Epoch 5: Train Acc=0.7537, Train Loss=0.0688, Val Acc=0.7664, Val Loss=0.0676


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▃▂▁▁
val_acc,▁▅▆▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,0.75365
train_loss,0.06878
val_acc,0.7664
val_loss,0.06757


[34m[1mwandb[0m: Agent Starting Run: 759t9ppj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8554, Train Loss=0.0393, Val Acc=0.8554, Val Loss=0.0394
Epoch 2: Train Acc=0.8767, Train Loss=0.0336, Val Acc=0.8722, Val Loss=0.0349
Epoch 3: Train Acc=0.8846, Train Loss=0.0314, Val Acc=0.8806, Val Loss=0.0333
Epoch 4: Train Acc=0.8897, Train Loss=0.0300, Val Acc=0.8832, Val Loss=0.0324
Epoch 5: Train Acc=0.8939, Train Loss=0.0288, Val Acc=0.8858, Val Loss=0.0317
Epoch 6: Train Acc=0.8968, Train Loss=0.0279, Val Acc=0.8878, Val Loss=0.0313
Epoch 7: Train Acc=0.8996, Train Loss=0.0272, Val Acc=0.8896, Val Loss=0.0310
Epoch 8: Train Acc=0.9023, Train Loss=0.0266, Val Acc=0.8904, Val Loss=0.0308
Epoch 9: Train Acc=0.9042, Train Loss=0.0260, Val Acc=0.8898, Val Loss=0.0307
Epoch 10: Train Acc=0.9059, Train Loss=0.0255, Val Acc=0.8900, Val Loss=0.0306


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▄▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.90595
train_loss,0.02548
val_acc,0.89
val_loss,0.03059


[34m[1mwandb[0m: Agent Starting Run: liered4p with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7563, Train Loss=0.0695, Val Acc=0.7626, Val Loss=0.0687
Epoch 2: Train Acc=0.8036, Train Loss=0.0556, Val Acc=0.8112, Val Loss=0.0548
Epoch 3: Train Acc=0.8235, Train Loss=0.0504, Val Acc=0.8306, Val Loss=0.0497
Epoch 4: Train Acc=0.8333, Train Loss=0.0476, Val Acc=0.8382, Val Loss=0.0469
Epoch 5: Train Acc=0.8398, Train Loss=0.0456, Val Acc=0.8442, Val Loss=0.0452
Epoch 6: Train Acc=0.8453, Train Loss=0.0442, Val Acc=0.8502, Val Loss=0.0439
Epoch 7: Train Acc=0.8501, Train Loss=0.0430, Val Acc=0.8546, Val Loss=0.0430
Epoch 8: Train Acc=0.8537, Train Loss=0.0421, Val Acc=0.8576, Val Loss=0.0422
Epoch 9: Train Acc=0.8564, Train Loss=0.0413, Val Acc=0.8602, Val Loss=0.0416
Epoch 10: Train Acc=0.8581, Train Loss=0.0405, Val Acc=0.8614, Val Loss=0.0410


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▆▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_acc,▁▄▆▆▇▇████
val_loss,█▄▃▂▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.85807
train_loss,0.04054
val_acc,0.8614
val_loss,0.04098


[34m[1mwandb[0m: Agent Starting Run: 925d9s74 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2301, Val Acc=0.0976, Val Loss=0.2302
Epoch 4: Train Acc=0.1609, Train Loss=0.2300, Val Acc=0.1604, Val Loss=0.2300
Epoch 5: Train Acc=0.1929, Train Loss=0.2297, Val Acc=0.1918, Val Loss=0.2297
Epoch 6: Train Acc=0.1953, Train Loss=0.2291, Val Acc=0.1958, Val Loss=0.2291
Epoch 7: Train Acc=0.1952, Train Loss=0.2274, Val Acc=0.1950, Val Loss=0.2275
Epoch 8: Train Acc=0.1957, Train Loss=0.2198, Val Acc=0.1954, Val Loss=0.2198
Epoch 9: Train Acc=0.2394, Train Loss=0.1851, Val Acc=0.2316, Val Loss=0.1849
Epoch 10: Train Acc=0.2881, Train Loss=0.1687, Val Acc=0.2780, Val Loss=0.1686


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▃▄▅▅▅▆█
train_loss,███████▇▃▁
val_acc,▁▁▁▃▅▅▅▅▆█
val_loss,███████▇▃▁

0,1
epoch,10.0
train_acc,0.28809
train_loss,0.16875
val_acc,0.278
val_loss,0.16863


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: up1scjby with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8002, Train Loss=0.0586, Val Acc=0.8072, Val Loss=0.0576
Epoch 2: Train Acc=0.8218, Train Loss=0.0518, Val Acc=0.8288, Val Loss=0.0509
Epoch 3: Train Acc=0.8315, Train Loss=0.0485, Val Acc=0.8356, Val Loss=0.0476
Epoch 4: Train Acc=0.8382, Train Loss=0.0464, Val Acc=0.8434, Val Loss=0.0456
Epoch 5: Train Acc=0.8429, Train Loss=0.0450, Val Acc=0.8502, Val Loss=0.0443


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.84289
train_loss,0.04501
val_acc,0.8502
val_loss,0.0443


[34m[1mwandb[0m: Agent Starting Run: 3xxf1al9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1942, Train Loss=0.2293, Val Acc=0.1934, Val Loss=0.2294
Epoch 2: Train Acc=0.2428, Train Loss=0.2270, Val Acc=0.2480, Val Loss=0.2270
Epoch 3: Train Acc=0.2682, Train Loss=0.2167, Val Acc=0.2724, Val Loss=0.2165
Epoch 4: Train Acc=0.2749, Train Loss=0.1838, Val Acc=0.2702, Val Loss=0.1834
Epoch 5: Train Acc=0.3307, Train Loss=0.1636, Val Acc=0.3230, Val Loss=0.1633


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▅█
train_loss,██▇▃▁
val_acc,▁▄▅▅█
val_loss,██▇▃▁

0,1
epoch,5.0
train_acc,0.33075
train_loss,0.16363
val_acc,0.323
val_loss,0.16328


[34m[1mwandb[0m: Agent Starting Run: vxnu576e with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8501, Train Loss=0.0420, Val Acc=0.8518, Val Loss=0.0417
Epoch 2: Train Acc=0.8649, Train Loss=0.0378, Val Acc=0.8608, Val Loss=0.0386
Epoch 3: Train Acc=0.8736, Train Loss=0.0355, Val Acc=0.8646, Val Loss=0.0369
Epoch 4: Train Acc=0.8789, Train Loss=0.0338, Val Acc=0.8684, Val Loss=0.0358
Epoch 5: Train Acc=0.8830, Train Loss=0.0326, Val Acc=0.8730, Val Loss=0.0350
Epoch 6: Train Acc=0.8868, Train Loss=0.0315, Val Acc=0.8738, Val Loss=0.0343
Epoch 7: Train Acc=0.8898, Train Loss=0.0305, Val Acc=0.8764, Val Loss=0.0338
Epoch 8: Train Acc=0.8926, Train Loss=0.0297, Val Acc=0.8778, Val Loss=0.0333
Epoch 9: Train Acc=0.8948, Train Loss=0.0289, Val Acc=0.8792, Val Loss=0.0329
Epoch 10: Train Acc=0.8971, Train Loss=0.0281, Val Acc=0.8808, Val Loss=0.0326


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▃▄▅▆▆▇▇██
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.89711
train_loss,0.02812
val_acc,0.8808
val_loss,0.03265


[34m[1mwandb[0m: Agent Starting Run: c1azykhe with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 3: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2301, Val Acc=0.0976, Val Loss=0.2302
Epoch 7: Train Acc=0.1002, Train Loss=0.2301, Val Acc=0.0976, Val Loss=0.2302
Epoch 8: Train Acc=0.1002, Train Loss=0.2300, Val Acc=0.0976, Val Loss=0.2301
Epoch 9: Train Acc=0.1003, Train Loss=0.2300, Val Acc=0.0976, Val Loss=0.2301
Epoch 10: Train Acc=0.1055, Train Loss=0.2299, Val Acc=0.1030, Val Loss=0.2300


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁█
train_loss,█▇▇▆▆▅▄▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁█
val_loss,█▇▇▆▆▅▄▃▂▁

0,1
epoch,10.0
train_acc,0.10555
train_loss,0.22991
val_acc,0.103
val_loss,0.22999


[34m[1mwandb[0m: Agent Starting Run: 3onastib with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7067, Train Loss=0.0798, Val Acc=0.7112, Val Loss=0.0797
Epoch 2: Train Acc=0.7789, Train Loss=0.0617, Val Acc=0.7842, Val Loss=0.0617
Epoch 3: Train Acc=0.7968, Train Loss=0.0555, Val Acc=0.8000, Val Loss=0.0558
Epoch 4: Train Acc=0.8070, Train Loss=0.0522, Val Acc=0.8064, Val Loss=0.0529
Epoch 5: Train Acc=0.8144, Train Loss=0.0497, Val Acc=0.8106, Val Loss=0.0507
Epoch 6: Train Acc=0.8209, Train Loss=0.0477, Val Acc=0.8154, Val Loss=0.0489
Epoch 7: Train Acc=0.8266, Train Loss=0.0459, Val Acc=0.8216, Val Loss=0.0474
Epoch 8: Train Acc=0.8339, Train Loss=0.0441, Val Acc=0.8274, Val Loss=0.0458
Epoch 9: Train Acc=0.8437, Train Loss=0.0423, Val Acc=0.8366, Val Loss=0.0442
Epoch 10: Train Acc=0.8519, Train Loss=0.0407, Val Acc=0.8448, Val Loss=0.0428


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▅▆▆▆▆▇▇██
val_loss,█▅▃▃▂▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.85193
train_loss,0.04068
val_acc,0.8448
val_loss,0.04282


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f5cxsajy with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1994, Train Loss=0.2295, Val Acc=0.1916, Val Loss=0.2295
Epoch 2: Train Acc=0.2012, Train Loss=0.2122, Val Acc=0.2030, Val Loss=0.2122
Epoch 3: Train Acc=0.2009, Train Loss=0.1936, Val Acc=0.2032, Val Loss=0.1937
Epoch 4: Train Acc=0.2010, Train Loss=0.1843, Val Acc=0.2016, Val Loss=0.1845
Epoch 5: Train Acc=0.2015, Train Loss=0.1794, Val Acc=0.2014, Val Loss=0.1798


0,1
epoch,▁▃▅▆█
train_acc,▁▇▆▆█
train_loss,█▆▃▂▁
val_acc,▁██▇▇
val_loss,█▆▃▂▁

0,1
epoch,5.0
train_acc,0.20145
train_loss,0.17944
val_acc,0.2014
val_loss,0.1798


[34m[1mwandb[0m: Agent Starting Run: opzbvle3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 2: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 3: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 4: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 5: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 6: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 7: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 8: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 9: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 10: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▇▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▆▅▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23039
val_acc,0.0976
val_loss,0.23049


[34m[1mwandb[0m: Agent Starting Run: fq4pgprf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁█████████
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁█████████

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: 7h5ecji7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7460, Train Loss=0.0729, Val Acc=0.7522, Val Loss=0.0728
Epoch 2: Train Acc=0.7866, Train Loss=0.0583, Val Acc=0.7904, Val Loss=0.0578
Epoch 3: Train Acc=0.8071, Train Loss=0.0528, Val Acc=0.8118, Val Loss=0.0522
Epoch 4: Train Acc=0.8227, Train Loss=0.0496, Val Acc=0.8278, Val Loss=0.0490
Epoch 5: Train Acc=0.8320, Train Loss=0.0472, Val Acc=0.8368, Val Loss=0.0467
Epoch 6: Train Acc=0.8388, Train Loss=0.0453, Val Acc=0.8416, Val Loss=0.0449
Epoch 7: Train Acc=0.8443, Train Loss=0.0438, Val Acc=0.8452, Val Loss=0.0435
Epoch 8: Train Acc=0.8485, Train Loss=0.0425, Val Acc=0.8490, Val Loss=0.0423
Epoch 9: Train Acc=0.8521, Train Loss=0.0413, Val Acc=0.8522, Val Loss=0.0412
Epoch 10: Train Acc=0.8562, Train Loss=0.0403, Val Acc=0.8554, Val Loss=0.0403


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.8562
train_loss,0.04031
val_acc,0.8554
val_loss,0.0403


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s7q6rmou with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1003, Train Loss=0.2303, Val Acc=0.0984, Val Loss=0.2303
Epoch 3: Train Acc=0.0949, Train Loss=0.2303, Val Acc=0.0880, Val Loss=0.2303
Epoch 4: Train Acc=0.0961, Train Loss=0.2303, Val Acc=0.0884, Val Loss=0.2303
Epoch 5: Train Acc=0.0962, Train Loss=0.2303, Val Acc=0.0886, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,██▁▃▃
train_loss,█▂▂▂▁
val_acc,▇█▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.0962
train_loss,0.23026
val_acc,0.0886
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: 26i9elf9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6945, Train Loss=0.0823, Val Acc=0.6986, Val Loss=0.0821
Epoch 2: Train Acc=0.7216, Train Loss=0.0701, Val Acc=0.7270, Val Loss=0.0694
Epoch 3: Train Acc=0.7347, Train Loss=0.0658, Val Acc=0.7392, Val Loss=0.0652
Epoch 4: Train Acc=0.7966, Train Loss=0.0572, Val Acc=0.8004, Val Loss=0.0573
Epoch 5: Train Acc=0.8331, Train Loss=0.0509, Val Acc=0.8332, Val Loss=0.0521
Epoch 6: Train Acc=0.8515, Train Loss=0.0466, Val Acc=0.8472, Val Loss=0.0486
Epoch 7: Train Acc=0.8611, Train Loss=0.0437, Val Acc=0.8568, Val Loss=0.0461
Epoch 8: Train Acc=0.8669, Train Loss=0.0419, Val Acc=0.8628, Val Loss=0.0446
Epoch 9: Train Acc=0.8715, Train Loss=0.0405, Val Acc=0.8648, Val Loss=0.0438
Epoch 10: Train Acc=0.8769, Train Loss=0.0386, Val Acc=0.8700, Val Loss=0.0423


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▃▅▆▇▇███
train_loss,█▆▅▄▃▂▂▂▁▁
val_acc,▁▂▃▅▆▇▇███
val_loss,█▆▅▄▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.87687
train_loss,0.03865
val_acc,0.87
val_loss,0.04225


[34m[1mwandb[0m: Agent Starting Run: 7q7oyfw4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.3012, Train Loss=0.1591, Val Acc=0.3100, Val Loss=0.1593
Epoch 2: Train Acc=0.3965, Train Loss=0.1226, Val Acc=0.4046, Val Loss=0.1226
Epoch 3: Train Acc=0.4585, Train Loss=0.1133, Val Acc=0.4588, Val Loss=0.1135
Epoch 4: Train Acc=0.4856, Train Loss=0.1055, Val Acc=0.4854, Val Loss=0.1059
Epoch 5: Train Acc=0.4929, Train Loss=0.1019, Val Acc=0.4916, Val Loss=0.1025
Epoch 6: Train Acc=0.5110, Train Loss=0.0994, Val Acc=0.5064, Val Loss=0.1001
Epoch 7: Train Acc=0.5851, Train Loss=0.0951, Val Acc=0.5836, Val Loss=0.0956
Epoch 8: Train Acc=0.6156, Train Loss=0.0913, Val Acc=0.6114, Val Loss=0.0919
Epoch 9: Train Acc=0.6701, Train Loss=0.0854, Val Acc=0.6620, Val Loss=0.0861
Epoch 10: Train Acc=0.7025, Train Loss=0.0764, Val Acc=0.6980, Val Loss=0.0770


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▄▄▅▆▆▇█
train_loss,█▅▄▃▃▃▃▂▂▁
val_acc,▁▃▄▄▄▅▆▆▇█
val_loss,█▅▄▃▃▃▃▂▂▁

0,1
epoch,10.0
train_acc,0.70251
train_loss,0.07637
val_acc,0.698
val_loss,0.07699


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 20ngm7hd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8071, Train Loss=0.0524, Val Acc=0.8126, Val Loss=0.0519
Epoch 2: Train Acc=0.8370, Train Loss=0.0456, Val Acc=0.8418, Val Loss=0.0452
Epoch 3: Train Acc=0.8486, Train Loss=0.0424, Val Acc=0.8506, Val Loss=0.0420
Epoch 4: Train Acc=0.8567, Train Loss=0.0403, Val Acc=0.8562, Val Loss=0.0401
Epoch 5: Train Acc=0.8622, Train Loss=0.0387, Val Acc=0.8604, Val Loss=0.0388


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.86218
train_loss,0.03875
val_acc,0.8604
val_loss,0.03875


[34m[1mwandb[0m: Agent Starting Run: 4ua3bm7q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▅▃▂▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▇█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iwc7o8lr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.3690, Train Loss=0.1596, Val Acc=0.3724, Val Loss=0.1599
Epoch 2: Train Acc=0.4843, Train Loss=0.1208, Val Acc=0.4834, Val Loss=0.1209
Epoch 3: Train Acc=0.5636, Train Loss=0.1040, Val Acc=0.5674, Val Loss=0.1039
Epoch 4: Train Acc=0.6110, Train Loss=0.0929, Val Acc=0.6176, Val Loss=0.0926
Epoch 5: Train Acc=0.6526, Train Loss=0.0864, Val Acc=0.6610, Val Loss=0.0860


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.65264
train_loss,0.08642
val_acc,0.661
val_loss,0.08602


[34m[1mwandb[0m: Agent Starting Run: 1o8y0pp0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 2: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 3: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 4: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 5: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2305


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23049
val_acc,0.0976
val_loss,0.23054


[34m[1mwandb[0m: Agent Starting Run: ec2sq65e with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5712, Train Loss=0.1314, Val Acc=0.5696, Val Loss=0.1311
Epoch 2: Train Acc=0.6102, Train Loss=0.1045, Val Acc=0.6108, Val Loss=0.1044
Epoch 3: Train Acc=0.6729, Train Loss=0.0906, Val Acc=0.6770, Val Loss=0.0906
Epoch 4: Train Acc=0.7052, Train Loss=0.0819, Val Acc=0.7080, Val Loss=0.0820
Epoch 5: Train Acc=0.7265, Train Loss=0.0761, Val Acc=0.7324, Val Loss=0.0762
Epoch 6: Train Acc=0.7417, Train Loss=0.0716, Val Acc=0.7466, Val Loss=0.0717
Epoch 7: Train Acc=0.7531, Train Loss=0.0680, Val Acc=0.7576, Val Loss=0.0680
Epoch 8: Train Acc=0.7629, Train Loss=0.0650, Val Acc=0.7668, Val Loss=0.0649
Epoch 9: Train Acc=0.7698, Train Loss=0.0626, Val Acc=0.7744, Val Loss=0.0624
Epoch 10: Train Acc=0.7758, Train Loss=0.0605, Val Acc=0.7794, Val Loss=0.0602


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▄▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_acc,▁▂▅▆▆▇▇███
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.77584
train_loss,0.06054
val_acc,0.7794
val_loss,0.06021


[34m[1mwandb[0m: Agent Starting Run: 7xdk94pw with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.2325, Train Loss=0.2262, Val Acc=0.2340, Val Loss=0.2262
Epoch 2: Train Acc=0.2037, Train Loss=0.2132, Val Acc=0.2072, Val Loss=0.2132
Epoch 3: Train Acc=0.2052, Train Loss=0.1984, Val Acc=0.2068, Val Loss=0.1985
Epoch 4: Train Acc=0.2048, Train Loss=0.1877, Val Acc=0.2072, Val Loss=0.1878
Epoch 5: Train Acc=0.2053, Train Loss=0.1809, Val Acc=0.2068, Val Loss=0.1810
Epoch 6: Train Acc=0.2057, Train Loss=0.1766, Val Acc=0.2076, Val Loss=0.1767
Epoch 7: Train Acc=0.2067, Train Loss=0.1739, Val Acc=0.2092, Val Loss=0.1739
Epoch 8: Train Acc=0.2099, Train Loss=0.1720, Val Acc=0.2118, Val Loss=0.1720
Epoch 9: Train Acc=0.2169, Train Loss=0.1707, Val Acc=0.2184, Val Loss=0.1706
Epoch 10: Train Acc=0.2216, Train Loss=0.1696, Val Acc=0.2254, Val Loss=0.1695


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,█▁▁▁▁▂▂▃▄▅
train_loss,█▆▅▃▂▂▂▁▁▁
val_acc,█▁▁▁▁▁▂▂▄▆
val_loss,█▆▅▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.22164
train_loss,0.16958
val_acc,0.2254
val_loss,0.16948


[34m[1mwandb[0m: Agent Starting Run: zmgupw2u with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 2: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 3: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 4: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 5: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 6: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 7: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 8: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 9: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305
Epoch 10: Train Acc=0.1002, Train Loss=0.2304, Val Acc=0.0976, Val Loss=0.2305


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▆▅▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.2304
val_acc,0.0976
val_loss,0.23049


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ors2xt6y with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.0990, Train Loss=0.2411, Val Acc=0.1112, Val Loss=0.2408
Epoch 2: Train Acc=0.1052, Train Loss=0.2383, Val Acc=0.1142, Val Loss=0.2380
Epoch 3: Train Acc=0.1225, Train Loss=0.2362, Val Acc=0.1270, Val Loss=0.2360
Epoch 4: Train Acc=0.1046, Train Loss=0.2346, Val Acc=0.0952, Val Loss=0.2344
Epoch 5: Train Acc=0.1004, Train Loss=0.2334, Val Acc=0.0916, Val Loss=0.2333
Epoch 6: Train Acc=0.1006, Train Loss=0.2325, Val Acc=0.0914, Val Loss=0.2324
Epoch 7: Train Acc=0.1007, Train Loss=0.2319, Val Acc=0.0914, Val Loss=0.2318
Epoch 8: Train Acc=0.1008, Train Loss=0.2314, Val Acc=0.0914, Val Loss=0.2313
Epoch 9: Train Acc=0.1008, Train Loss=0.2310, Val Acc=0.0914, Val Loss=0.2310
Epoch 10: Train Acc=0.1008, Train Loss=0.2308, Val Acc=0.0914, Val Loss=0.2307


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃█▃▁▁▂▂▂▂
train_loss,█▆▅▄▃▂▂▁▁▁
val_acc,▅▅█▂▁▁▁▁▁▁
val_loss,█▆▅▄▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23079
val_acc,0.0914
val_loss,0.23074


[34m[1mwandb[0m: Agent Starting Run: y5om8krc with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁▇███
val_acc,▁▁▁▁▁
val_loss,▁▇███

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: 0f84g26y with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5669, Train Loss=0.1036, Val Acc=0.5728, Val Loss=0.1036
Epoch 2: Train Acc=0.6244, Train Loss=0.0957, Val Acc=0.6322, Val Loss=0.0956
Epoch 3: Train Acc=0.6499, Train Loss=0.0913, Val Acc=0.6550, Val Loss=0.0915
Epoch 4: Train Acc=0.6753, Train Loss=0.0876, Val Acc=0.6764, Val Loss=0.0879
Epoch 5: Train Acc=0.6926, Train Loss=0.0844, Val Acc=0.6964, Val Loss=0.0849


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▄▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.69264
train_loss,0.08444
val_acc,0.6964
val_loss,0.08491


[34m[1mwandb[0m: Agent Starting Run: vi5m3hmt with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▅▄▃▂▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▃▄▅▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Agent Starting Run: 9yz1e8nv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.4918, Train Loss=0.1161, Val Acc=0.4982, Val Loss=0.1158
Epoch 2: Train Acc=0.5305, Train Loss=0.1061, Val Acc=0.5414, Val Loss=0.1059
Epoch 3: Train Acc=0.5472, Train Loss=0.1031, Val Acc=0.5604, Val Loss=0.1029
Epoch 4: Train Acc=0.5611, Train Loss=0.1012, Val Acc=0.5762, Val Loss=0.1011
Epoch 5: Train Acc=0.5701, Train Loss=0.0998, Val Acc=0.5836, Val Loss=0.0997


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▂▂▁
val_acc,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.57011
train_loss,0.09985
val_acc,0.5836
val_loss,0.09968


[34m[1mwandb[0m: Agent Starting Run: 5n33yj0p with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7330, Train Loss=0.0734, Val Acc=0.7392, Val Loss=0.0730
Epoch 2: Train Acc=0.7625, Train Loss=0.0629, Val Acc=0.7654, Val Loss=0.0632
Epoch 3: Train Acc=0.7851, Train Loss=0.0562, Val Acc=0.7854, Val Loss=0.0571
Epoch 4: Train Acc=0.7992, Train Loss=0.0518, Val Acc=0.7962, Val Loss=0.0534
Epoch 5: Train Acc=0.8049, Train Loss=0.0493, Val Acc=0.8028, Val Loss=0.0511


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.80491
train_loss,0.04932
val_acc,0.8028
val_loss,0.05112


[34m[1mwandb[0m: Agent Starting Run: rtyh4jsr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1981, Train Loss=0.2287, Val Acc=0.1906, Val Loss=0.2287
Epoch 2: Train Acc=0.2009, Train Loss=0.2222, Val Acc=0.1930, Val Loss=0.2223
Epoch 3: Train Acc=0.2003, Train Loss=0.2083, Val Acc=0.1932, Val Loss=0.2084
Epoch 4: Train Acc=0.2014, Train Loss=0.1931, Val Acc=0.1946, Val Loss=0.1931
Epoch 5: Train Acc=0.2020, Train Loss=0.1825, Val Acc=0.1948, Val Loss=0.1826


0,1
epoch,▁▃▅▆█
train_acc,▁▆▅▇█
train_loss,█▇▅▃▁
val_acc,▁▅▅██
val_loss,█▇▅▃▁

0,1
epoch,5.0
train_acc,0.20202
train_loss,0.18253
val_acc,0.1948
val_loss,0.18259


[34m[1mwandb[0m: Agent Starting Run: x9dl82nr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▄▃▃▂▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▄▄▅▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Agent Starting Run: 11rp6a82 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7703, Train Loss=0.0687, Val Acc=0.7752, Val Loss=0.0680
Epoch 2: Train Acc=0.8176, Train Loss=0.0532, Val Acc=0.8228, Val Loss=0.0523
Epoch 3: Train Acc=0.8333, Train Loss=0.0473, Val Acc=0.8396, Val Loss=0.0466
Epoch 4: Train Acc=0.8436, Train Loss=0.0441, Val Acc=0.8490, Val Loss=0.0436
Epoch 5: Train Acc=0.8513, Train Loss=0.0419, Val Acc=0.8558, Val Loss=0.0418
Epoch 6: Train Acc=0.8569, Train Loss=0.0402, Val Acc=0.8598, Val Loss=0.0404
Epoch 7: Train Acc=0.8607, Train Loss=0.0389, Val Acc=0.8626, Val Loss=0.0393
Epoch 8: Train Acc=0.8647, Train Loss=0.0378, Val Acc=0.8628, Val Loss=0.0385
Epoch 9: Train Acc=0.8679, Train Loss=0.0368, Val Acc=0.8642, Val Loss=0.0378
Epoch 10: Train Acc=0.8709, Train Loss=0.0360, Val Acc=0.8664, Val Loss=0.0372


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_acc,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.87085
train_loss,0.03595
val_acc,0.8664
val_loss,0.03716


[34m[1mwandb[0m: Agent Starting Run: 0k7o9frf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7825, Train Loss=0.0650, Val Acc=0.7874, Val Loss=0.0639
Epoch 2: Train Acc=0.8200, Train Loss=0.0512, Val Acc=0.8270, Val Loss=0.0502
Epoch 3: Train Acc=0.8348, Train Loss=0.0463, Val Acc=0.8406, Val Loss=0.0456
Epoch 4: Train Acc=0.8445, Train Loss=0.0435, Val Acc=0.8486, Val Loss=0.0431
Epoch 5: Train Acc=0.8519, Train Loss=0.0415, Val Acc=0.8516, Val Loss=0.0413
Epoch 6: Train Acc=0.8568, Train Loss=0.0399, Val Acc=0.8560, Val Loss=0.0400
Epoch 7: Train Acc=0.8615, Train Loss=0.0386, Val Acc=0.8592, Val Loss=0.0390
Epoch 8: Train Acc=0.8653, Train Loss=0.0375, Val Acc=0.8628, Val Loss=0.0381
Epoch 9: Train Acc=0.8685, Train Loss=0.0366, Val Acc=0.8642, Val Loss=0.0374
Epoch 10: Train Acc=0.8713, Train Loss=0.0358, Val Acc=0.8670, Val Loss=0.0368


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▆▆▇▇▇███
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.87125
train_loss,0.03576
val_acc,0.867
val_loss,0.03684


[34m[1mwandb[0m: Agent Starting Run: 4p9oclj1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7843, Train Loss=0.0611, Val Acc=0.7910, Val Loss=0.0604
Epoch 2: Train Acc=0.8252, Train Loss=0.0501, Val Acc=0.8290, Val Loss=0.0494
Epoch 3: Train Acc=0.8401, Train Loss=0.0458, Val Acc=0.8472, Val Loss=0.0451
Epoch 4: Train Acc=0.8499, Train Loss=0.0430, Val Acc=0.8542, Val Loss=0.0425
Epoch 5: Train Acc=0.8568, Train Loss=0.0410, Val Acc=0.8602, Val Loss=0.0407
Epoch 6: Train Acc=0.8620, Train Loss=0.0394, Val Acc=0.8616, Val Loss=0.0394
Epoch 7: Train Acc=0.8654, Train Loss=0.0382, Val Acc=0.8632, Val Loss=0.0384
Epoch 8: Train Acc=0.8694, Train Loss=0.0372, Val Acc=0.8658, Val Loss=0.0376
Epoch 9: Train Acc=0.8728, Train Loss=0.0362, Val Acc=0.8684, Val Loss=0.0368
Epoch 10: Train Acc=0.8752, Train Loss=0.0355, Val Acc=0.8718, Val Loss=0.0362


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_acc,▁▄▆▆▇▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.87518
train_loss,0.03546
val_acc,0.8718
val_loss,0.03623


[34m[1mwandb[0m: Agent Starting Run: tz2ios2w with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁▂▆▇█
val_acc,▁▁▁▁▁
val_loss,▁▅▇██

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: llxkeyq4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23032
val_acc,0.0976
val_loss,0.2304


[34m[1mwandb[0m: Agent Starting Run: ftfwfd15 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6404, Train Loss=0.0925, Val Acc=0.6454, Val Loss=0.0925
Epoch 2: Train Acc=0.6872, Train Loss=0.0857, Val Acc=0.6890, Val Loss=0.0859
Epoch 3: Train Acc=0.7141, Train Loss=0.0804, Val Acc=0.7176, Val Loss=0.0806
Epoch 4: Train Acc=0.7381, Train Loss=0.0745, Val Acc=0.7470, Val Loss=0.0749
Epoch 5: Train Acc=0.7528, Train Loss=0.0704, Val Acc=0.7576, Val Loss=0.0710
Epoch 6: Train Acc=0.7594, Train Loss=0.0680, Val Acc=0.7626, Val Loss=0.0687
Epoch 7: Train Acc=0.7647, Train Loss=0.0663, Val Acc=0.7674, Val Loss=0.0671
Epoch 8: Train Acc=0.7686, Train Loss=0.0648, Val Acc=0.7716, Val Loss=0.0656
Epoch 9: Train Acc=0.7734, Train Loss=0.0635, Val Acc=0.7762, Val Loss=0.0644
Epoch 10: Train Acc=0.7781, Train Loss=0.0623, Val Acc=0.7804, Val Loss=0.0632


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▆▇▇▇███
train_loss,█▆▅▄▃▂▂▂▁▁
val_acc,▁▃▅▆▇▇▇███
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.77805
train_loss,0.06226
val_acc,0.7804
val_loss,0.06318


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f99xvzx7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2009, Train Loss=0.2206, Val Acc=0.1958, Val Loss=0.2206
Epoch 2: Train Acc=0.2053, Train Loss=0.1710, Val Acc=0.2002, Val Loss=0.1709
Epoch 3: Train Acc=0.2938, Train Loss=0.1552, Val Acc=0.2880, Val Loss=0.1556
Epoch 4: Train Acc=0.2952, Train Loss=0.1478, Val Acc=0.2890, Val Loss=0.1487
Epoch 5: Train Acc=0.4311, Train Loss=0.1290, Val Acc=0.4316, Val Loss=0.1293


0,1
epoch,▁▃▅▆█
train_acc,▁▁▄▄█
train_loss,█▄▃▂▁
val_acc,▁▁▄▄█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.43111
train_loss,0.12901
val_acc,0.4316
val_loss,0.12926


[34m[1mwandb[0m: Agent Starting Run: 7stlxi2l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8359, Train Loss=0.0456, Val Acc=0.8440, Val Loss=0.0448
Epoch 2: Train Acc=0.8532, Train Loss=0.0408, Val Acc=0.8552, Val Loss=0.0406
Epoch 3: Train Acc=0.8620, Train Loss=0.0382, Val Acc=0.8610, Val Loss=0.0387
Epoch 4: Train Acc=0.8684, Train Loss=0.0364, Val Acc=0.8664, Val Loss=0.0373
Epoch 5: Train Acc=0.8736, Train Loss=0.0349, Val Acc=0.8708, Val Loss=0.0363
Epoch 6: Train Acc=0.8779, Train Loss=0.0336, Val Acc=0.8738, Val Loss=0.0355
Epoch 7: Train Acc=0.8812, Train Loss=0.0325, Val Acc=0.8762, Val Loss=0.0348
Epoch 8: Train Acc=0.8848, Train Loss=0.0316, Val Acc=0.8778, Val Loss=0.0342
Epoch 9: Train Acc=0.8875, Train Loss=0.0307, Val Acc=0.8778, Val Loss=0.0336
Epoch 10: Train Acc=0.8901, Train Loss=0.0299, Val Acc=0.8782, Val Loss=0.0332


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▃▄▆▆▇████
val_loss,█▅▄▄▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.89015
train_loss,0.02993
val_acc,0.8782
val_loss,0.0332


[34m[1mwandb[0m: Agent Starting Run: hb20yhrg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.0732, Train Loss=0.2298, Val Acc=0.0740, Val Loss=0.2297
Epoch 2: Train Acc=0.1001, Train Loss=0.2280, Val Acc=0.1020, Val Loss=0.2280
Epoch 3: Train Acc=0.1285, Train Loss=0.2261, Val Acc=0.1272, Val Loss=0.2261
Epoch 4: Train Acc=0.1587, Train Loss=0.2241, Val Acc=0.1588, Val Loss=0.2241
Epoch 5: Train Acc=0.1813, Train Loss=0.2219, Val Acc=0.1800, Val Loss=0.2219
Epoch 6: Train Acc=0.1941, Train Loss=0.2194, Val Acc=0.1960, Val Loss=0.2193
Epoch 7: Train Acc=0.2011, Train Loss=0.2164, Val Acc=0.2028, Val Loss=0.2163
Epoch 8: Train Acc=0.2083, Train Loss=0.2128, Val Acc=0.2104, Val Loss=0.2128
Epoch 9: Train Acc=0.2118, Train Loss=0.2088, Val Acc=0.2156, Val Loss=0.2086
Epoch 10: Train Acc=0.2088, Train Loss=0.2041, Val Acc=0.2134, Val Loss=0.2039


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▄▅▆▇▇███
train_loss,██▇▆▆▅▄▃▂▁
val_acc,▁▂▄▅▆▇▇███
val_loss,██▇▆▆▅▄▃▂▁

0,1
epoch,10.0
train_acc,0.2088
train_loss,0.20407
val_acc,0.2134
val_loss,0.20391


[34m[1mwandb[0m: Agent Starting Run: rhj30i0g with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 2: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 3: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 4: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 5: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2307


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▄▃▁

0,1
epoch,5.0
train_acc,0.10018
train_loss,0.23065
val_acc,0.098
val_loss,0.23066


[34m[1mwandb[0m: Agent Starting Run: oo8t827u with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7134, Train Loss=0.0775, Val Acc=0.7214, Val Loss=0.0775
Epoch 2: Train Acc=0.7897, Train Loss=0.0591, Val Acc=0.7930, Val Loss=0.0597
Epoch 3: Train Acc=0.8134, Train Loss=0.0509, Val Acc=0.8132, Val Loss=0.0519
Epoch 4: Train Acc=0.8239, Train Loss=0.0474, Val Acc=0.8218, Val Loss=0.0489
Epoch 5: Train Acc=0.8413, Train Loss=0.0445, Val Acc=0.8344, Val Loss=0.0465


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▂▂▁
val_acc,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.84131
train_loss,0.04455
val_acc,0.8344
val_loss,0.04648


[34m[1mwandb[0m: Agent Starting Run: jni17ggr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5695, Train Loss=0.1016, Val Acc=0.5696, Val Loss=0.1015
Epoch 2: Train Acc=0.6554, Train Loss=0.0824, Val Acc=0.6584, Val Loss=0.0826
Epoch 3: Train Acc=0.6829, Train Loss=0.0750, Val Acc=0.6848, Val Loss=0.0748
Epoch 4: Train Acc=0.7221, Train Loss=0.0669, Val Acc=0.7244, Val Loss=0.0663
Epoch 5: Train Acc=0.7790, Train Loss=0.0605, Val Acc=0.7846, Val Loss=0.0601


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▆█
train_loss,█▅▃▂▁
val_acc,▁▄▅▆█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.77902
train_loss,0.06052
val_acc,0.7846
val_loss,0.06012


[34m[1mwandb[0m: Agent Starting Run: u90miv6d with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5460, Train Loss=0.1711, Val Acc=0.5462, Val Loss=0.1709
Epoch 2: Train Acc=0.6159, Train Loss=0.1436, Val Acc=0.6138, Val Loss=0.1433
Epoch 3: Train Acc=0.6501, Train Loss=0.1246, Val Acc=0.6520, Val Loss=0.1242
Epoch 4: Train Acc=0.6810, Train Loss=0.1105, Val Acc=0.6802, Val Loss=0.1101
Epoch 5: Train Acc=0.7008, Train Loss=0.1000, Val Acc=0.7008, Val Loss=0.0995


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.70078
train_loss,0.09996
val_acc,0.7008
val_loss,0.09952


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6d2r7ifd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 2: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 3: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 4: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 5: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.2305
val_acc,0.0976
val_loss,0.23058


[34m[1mwandb[0m: Agent Starting Run: pwxngyz2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 2: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 3: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 4: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 5: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 6: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 7: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 8: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 9: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306
Epoch 10: Train Acc=0.1002, Train Loss=0.2305, Val Acc=0.0976, Val Loss=0.2306


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▆▅▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.2305
val_acc,0.0976
val_loss,0.23059


[34m[1mwandb[0m: Agent Starting Run: bnudjsc2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8013, Train Loss=0.0574, Val Acc=0.8066, Val Loss=0.0566
Epoch 2: Train Acc=0.8215, Train Loss=0.0517, Val Acc=0.8272, Val Loss=0.0509
Epoch 3: Train Acc=0.8317, Train Loss=0.0487, Val Acc=0.8368, Val Loss=0.0480
Epoch 4: Train Acc=0.8373, Train Loss=0.0468, Val Acc=0.8428, Val Loss=0.0461
Epoch 5: Train Acc=0.8419, Train Loss=0.0454, Val Acc=0.8478, Val Loss=0.0448


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.84193
train_loss,0.04538
val_acc,0.8478
val_loss,0.04476


[34m[1mwandb[0m: Agent Starting Run: 1es2tsvf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.8057, Train Loss=0.0527, Val Acc=0.8076, Val Loss=0.0538
Epoch 2: Train Acc=0.8457, Train Loss=0.0437, Val Acc=0.8424, Val Loss=0.0447
Epoch 3: Train Acc=0.8606, Train Loss=0.0386, Val Acc=0.8474, Val Loss=0.0412
Epoch 4: Train Acc=0.8701, Train Loss=0.0361, Val Acc=0.8614, Val Loss=0.0391
Epoch 5: Train Acc=0.8786, Train Loss=0.0338, Val Acc=0.8666, Val Loss=0.0375


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.87858
train_loss,0.03378
val_acc,0.8666
val_loss,0.03752


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rmchhhkj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.3834, Train Loss=0.1461, Val Acc=0.3916, Val Loss=0.1465
Epoch 2: Train Acc=0.4145, Train Loss=0.1179, Val Acc=0.4196, Val Loss=0.1181
Epoch 3: Train Acc=0.4789, Train Loss=0.1088, Val Acc=0.4794, Val Loss=0.1090
Epoch 4: Train Acc=0.4843, Train Loss=0.1045, Val Acc=0.4832, Val Loss=0.1048
Epoch 5: Train Acc=0.5034, Train Loss=0.1013, Val Acc=0.5006, Val Loss=0.1016


0,1
epoch,▁▃▅▆█
train_acc,▁▃▇▇█
train_loss,█▄▂▁▁
val_acc,▁▃▇▇█
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.50338
train_loss,0.10133
val_acc,0.5006
val_loss,0.1016


[34m[1mwandb[0m: Agent Starting Run: m9mf1mcf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1999, Train Loss=0.1690, Val Acc=0.1980, Val Loss=0.1688
Epoch 2: Train Acc=0.2021, Train Loss=0.1671, Val Acc=0.2004, Val Loss=0.1669
Epoch 3: Train Acc=0.3839, Train Loss=0.1464, Val Acc=0.3910, Val Loss=0.1468
Epoch 4: Train Acc=0.3928, Train Loss=0.1236, Val Acc=0.3990, Val Loss=0.1240
Epoch 5: Train Acc=0.4067, Train Loss=0.1179, Val Acc=0.4124, Val Loss=0.1183


0,1
epoch,▁▃▅▆█
train_acc,▁▁▇██
train_loss,██▅▂▁
val_acc,▁▁▇██
val_loss,██▅▂▁

0,1
epoch,5.0
train_acc,0.40675
train_loss,0.11791
val_acc,0.4124
val_loss,0.11825


[34m[1mwandb[0m: Agent Starting Run: 5e0faqg4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.2609, Train Loss=0.2167, Val Acc=0.2656, Val Loss=0.2165
Epoch 2: Train Acc=0.3511, Train Loss=0.2091, Val Acc=0.3536, Val Loss=0.2089
Epoch 3: Train Acc=0.4120, Train Loss=0.2029, Val Acc=0.4214, Val Loss=0.2026
Epoch 4: Train Acc=0.4684, Train Loss=0.1975, Val Acc=0.4762, Val Loss=0.1972
Epoch 5: Train Acc=0.5057, Train Loss=0.1927, Val Acc=0.5118, Val Loss=0.1924


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▇█
train_loss,█▆▄▂▁
val_acc,▁▄▅▇█
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_acc,0.50573
train_loss,0.19273
val_acc,0.5118
val_loss,0.1924


[34m[1mwandb[0m: Agent Starting Run: 4qnva48o with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1486, Train Loss=0.2301, Val Acc=0.1420, Val Loss=0.2302
Epoch 3: Train Acc=0.3115, Train Loss=0.1513, Val Acc=0.3246, Val Loss=0.1507
Epoch 4: Train Acc=0.6354, Train Loss=0.0953, Val Acc=0.6450, Val Loss=0.0951
Epoch 5: Train Acc=0.7230, Train Loss=0.0792, Val Acc=0.7294, Val Loss=0.0793
Epoch 6: Train Acc=0.7715, Train Loss=0.0636, Val Acc=0.7776, Val Loss=0.0644
Epoch 7: Train Acc=0.8037, Train Loss=0.0526, Val Acc=0.8096, Val Loss=0.0529
Epoch 8: Train Acc=0.8367, Train Loss=0.0465, Val Acc=0.8374, Val Loss=0.0474
Epoch 9: Train Acc=0.8507, Train Loss=0.0427, Val Acc=0.8472, Val Loss=0.0442
Epoch 10: Train Acc=0.8597, Train Loss=0.0400, Val Acc=0.8542, Val Loss=0.0420


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▃▆▇▇▇███
train_loss,██▅▃▂▂▁▁▁▁
val_acc,▁▁▃▆▇▇████
val_loss,██▅▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.85969
train_loss,0.03995
val_acc,0.8542
val_loss,0.04202


[34m[1mwandb[0m: Agent Starting Run: ol6riadw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7471, Train Loss=0.0697, Val Acc=0.7578, Val Loss=0.0694
Epoch 2: Train Acc=0.8364, Train Loss=0.0487, Val Acc=0.8392, Val Loss=0.0505
Epoch 3: Train Acc=0.8527, Train Loss=0.0439, Val Acc=0.8496, Val Loss=0.0469
Epoch 4: Train Acc=0.8630, Train Loss=0.0409, Val Acc=0.8556, Val Loss=0.0442
Epoch 5: Train Acc=0.8708, Train Loss=0.0383, Val Acc=0.8628, Val Loss=0.0420
Epoch 6: Train Acc=0.8753, Train Loss=0.0366, Val Acc=0.8642, Val Loss=0.0407
Epoch 7: Train Acc=0.8771, Train Loss=0.0360, Val Acc=0.8664, Val Loss=0.0407
Epoch 8: Train Acc=0.8794, Train Loss=0.0351, Val Acc=0.8674, Val Loss=0.0404
Epoch 9: Train Acc=0.8787, Train Loss=0.0353, Val Acc=0.8652, Val Loss=0.0413
Epoch 10: Train Acc=0.8805, Train Loss=0.0344, Val Acc=0.8678, Val Loss=0.0408


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▄▃▂▂▁▁▁▁▁
val_acc,▁▆▇▇██████
val_loss,█▃▃▂▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.88051
train_loss,0.03443
val_acc,0.8678
val_loss,0.04076


[34m[1mwandb[0m: Agent Starting Run: g9uakq3l with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁████
train_loss,█▁▁▁▁
val_acc,█▁▁▁▁
val_loss,█▂▁▁▁

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: l17oxnxh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2061, Train Loss=0.1702, Val Acc=0.2028, Val Loss=0.1702
Epoch 2: Train Acc=0.2276, Train Loss=0.1666, Val Acc=0.2248, Val Loss=0.1665
Epoch 3: Train Acc=0.3787, Train Loss=0.1516, Val Acc=0.3812, Val Loss=0.1519
Epoch 4: Train Acc=0.3987, Train Loss=0.1364, Val Acc=0.3986, Val Loss=0.1365
Epoch 5: Train Acc=0.4246, Train Loss=0.1283, Val Acc=0.4292, Val Loss=0.1285


0,1
epoch,▁▃▅▆█
train_acc,▁▂▇▇█
train_loss,█▇▅▂▁
val_acc,▁▂▇▇█
val_loss,█▇▅▂▁

0,1
epoch,5.0
train_acc,0.42464
train_loss,0.12828
val_acc,0.4292
val_loss,0.12846


[34m[1mwandb[0m: Agent Starting Run: zimx0zw4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6577, Train Loss=0.1015, Val Acc=0.6582, Val Loss=0.1016
Epoch 2: Train Acc=0.7172, Train Loss=0.0840, Val Acc=0.7208, Val Loss=0.0842
Epoch 3: Train Acc=0.7311, Train Loss=0.0760, Val Acc=0.7354, Val Loss=0.0764
Epoch 4: Train Acc=0.7455, Train Loss=0.0709, Val Acc=0.7494, Val Loss=0.0711
Epoch 5: Train Acc=0.7616, Train Loss=0.0669, Val Acc=0.7634, Val Loss=0.0670
Epoch 6: Train Acc=0.7722, Train Loss=0.0635, Val Acc=0.7736, Val Loss=0.0637
Epoch 7: Train Acc=0.7808, Train Loss=0.0608, Val Acc=0.7838, Val Loss=0.0611
Epoch 8: Train Acc=0.7873, Train Loss=0.0587, Val Acc=0.7896, Val Loss=0.0590
Epoch 9: Train Acc=0.7929, Train Loss=0.0570, Val Acc=0.7944, Val Loss=0.0572
Epoch 10: Train Acc=0.7990, Train Loss=0.0555, Val Acc=0.8000, Val Loss=0.0557


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▅▆▇▇▇██
train_loss,█▅▄▃▃▂▂▁▁▁
val_acc,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.79896
train_loss,0.05546
val_acc,0.8
val_loss,0.05569


[34m[1mwandb[0m: Agent Starting Run: 0aquuadw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.3651, Train Loss=0.2186, Val Acc=0.3720, Val Loss=0.2183
Epoch 2: Train Acc=0.4933, Train Loss=0.2052, Val Acc=0.5020, Val Loss=0.2046
Epoch 3: Train Acc=0.5325, Train Loss=0.1855, Val Acc=0.5446, Val Loss=0.1846
Epoch 4: Train Acc=0.5562, Train Loss=0.1623, Val Acc=0.5706, Val Loss=0.1611
Epoch 5: Train Acc=0.6131, Train Loss=0.1410, Val Acc=0.6218, Val Loss=0.1397


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▆█
train_loss,█▇▅▃▁
val_acc,▁▅▆▇█
val_loss,█▇▅▃▁

0,1
epoch,5.0
train_acc,0.61313
train_loss,0.141
val_acc,0.6218
val_loss,0.13967


[34m[1mwandb[0m: Agent Starting Run: qcwe5hlu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7147, Train Loss=0.0733, Val Acc=0.7174, Val Loss=0.0731
Epoch 2: Train Acc=0.7773, Train Loss=0.0627, Val Acc=0.7848, Val Loss=0.0625
Epoch 3: Train Acc=0.7963, Train Loss=0.0562, Val Acc=0.8016, Val Loss=0.0565
Epoch 4: Train Acc=0.8057, Train Loss=0.0528, Val Acc=0.8096, Val Loss=0.0535
Epoch 5: Train Acc=0.8121, Train Loss=0.0506, Val Acc=0.8144, Val Loss=0.0516


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇██
train_loss,█▅▃▂▁
val_acc,▁▆▇██
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.81211
train_loss,0.0506
val_acc,0.8144
val_loss,0.0516


[34m[1mwandb[0m: Agent Starting Run: lgs4dzhj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1993, Train Loss=0.1705, Val Acc=0.2124, Val Loss=0.1703
Epoch 2: Train Acc=0.1999, Train Loss=0.1691, Val Acc=0.1984, Val Loss=0.1688
Epoch 3: Train Acc=0.2001, Train Loss=0.1684, Val Acc=0.1988, Val Loss=0.1681
Epoch 4: Train Acc=0.2005, Train Loss=0.1680, Val Acc=0.1998, Val Loss=0.1676
Epoch 5: Train Acc=0.2060, Train Loss=0.1676, Val Acc=0.2206, Val Loss=0.1672
Epoch 6: Train Acc=0.2021, Train Loss=0.1672, Val Acc=0.2182, Val Loss=0.1669
Epoch 7: Train Acc=0.2017, Train Loss=0.1668, Val Acc=0.2162, Val Loss=0.1665
Epoch 8: Train Acc=0.2011, Train Loss=0.1665, Val Acc=0.2154, Val Loss=0.1662
Epoch 9: Train Acc=0.2003, Train Loss=0.1660, Val Acc=0.2140, Val Loss=0.1658
Epoch 10: Train Acc=0.2011, Train Loss=0.1654, Val Acc=0.2160, Val Loss=0.1652


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▂▂█▄▃▃▂▃
train_loss,█▆▅▅▄▃▃▂▂▁
val_acc,▅▁▁▁█▇▇▆▆▇
val_loss,█▆▅▄▄▃▃▂▂▁

0,1
epoch,10.0
train_acc,0.20113
train_loss,0.16536
val_acc,0.216
val_loss,0.16516


[34m[1mwandb[0m: Agent Starting Run: ar6wolyz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7676, Train Loss=0.0829, Val Acc=0.7704, Val Loss=0.0823
Epoch 2: Train Acc=0.7920, Train Loss=0.0690, Val Acc=0.7994, Val Loss=0.0683
Epoch 3: Train Acc=0.8068, Train Loss=0.0621, Val Acc=0.8112, Val Loss=0.0614
Epoch 4: Train Acc=0.8161, Train Loss=0.0578, Val Acc=0.8216, Val Loss=0.0571
Epoch 5: Train Acc=0.8228, Train Loss=0.0548, Val Acc=0.8268, Val Loss=0.0541


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.82278
train_loss,0.05483
val_acc,0.8268
val_loss,0.05411


[34m[1mwandb[0m: Agent Starting Run: qiaq11sw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.4035, Train Loss=0.1275, Val Acc=0.4084, Val Loss=0.1279
Epoch 2: Train Acc=0.4316, Train Loss=0.1217, Val Acc=0.4352, Val Loss=0.1220
Epoch 3: Train Acc=0.4558, Train Loss=0.1187, Val Acc=0.4630, Val Loss=0.1189
Epoch 4: Train Acc=0.4732, Train Loss=0.1163, Val Acc=0.4800, Val Loss=0.1164
Epoch 5: Train Acc=0.4873, Train Loss=0.1141, Val Acc=0.4928, Val Loss=0.1141
Epoch 6: Train Acc=0.5081, Train Loss=0.1119, Val Acc=0.5164, Val Loss=0.1118
Epoch 7: Train Acc=0.5474, Train Loss=0.1099, Val Acc=0.5530, Val Loss=0.1096
Epoch 8: Train Acc=0.5853, Train Loss=0.1080, Val Acc=0.5944, Val Loss=0.1076
Epoch 9: Train Acc=0.6051, Train Loss=0.1064, Val Acc=0.6112, Val Loss=0.1059
Epoch 10: Train Acc=0.6158, Train Loss=0.1049, Val Acc=0.6206, Val Loss=0.1043


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▃▃▄▄▆▇██
train_loss,█▆▅▅▄▃▃▂▁▁
val_acc,▁▂▃▃▄▅▆▇██
val_loss,█▆▅▅▄▃▃▂▁▁

0,1
epoch,10.0
train_acc,0.61582
train_loss,0.10492
val_acc,0.6206
val_loss,0.1043


[34m[1mwandb[0m: Agent Starting Run: el3pmh6j with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▁▁▁▁
val_acc,▁▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: s1u2e7tz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1965, Train Loss=0.2269, Val Acc=0.2000, Val Loss=0.2269
Epoch 2: Train Acc=0.1945, Train Loss=0.1980, Val Acc=0.1912, Val Loss=0.1979
Epoch 3: Train Acc=0.1998, Train Loss=0.1741, Val Acc=0.1958, Val Loss=0.1740
Epoch 4: Train Acc=0.2014, Train Loss=0.1696, Val Acc=0.1998, Val Loss=0.1694
Epoch 5: Train Acc=0.2051, Train Loss=0.1679, Val Acc=0.1966, Val Loss=0.1678
Epoch 6: Train Acc=0.2965, Train Loss=0.1620, Val Acc=0.2878, Val Loss=0.1621
Epoch 7: Train Acc=0.3575, Train Loss=0.1517, Val Acc=0.3578, Val Loss=0.1519
Epoch 8: Train Acc=0.3645, Train Loss=0.1421, Val Acc=0.3668, Val Loss=0.1423
Epoch 9: Train Acc=0.3892, Train Loss=0.1348, Val Acc=0.3990, Val Loss=0.1349
Epoch 10: Train Acc=0.3984, Train Loss=0.1293, Val Acc=0.4060, Val Loss=0.1295


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▅▇▇██
train_loss,█▆▄▄▄▃▃▂▁▁
val_acc,▁▁▁▁▁▄▆▇██
val_loss,█▆▄▄▄▃▃▂▁▁

0,1
epoch,10.0
train_acc,0.39844
train_loss,0.12935
val_acc,0.406
val_loss,0.12946


[34m[1mwandb[0m: Agent Starting Run: 3j0ijgzv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2171, Train Loss=0.1827, Val Acc=0.2104, Val Loss=0.1821
Epoch 2: Train Acc=0.2991, Train Loss=0.1690, Val Acc=0.2966, Val Loss=0.1685
Epoch 3: Train Acc=0.2967, Train Loss=0.1537, Val Acc=0.2958, Val Loss=0.1531
Epoch 4: Train Acc=0.4124, Train Loss=0.1343, Val Acc=0.4174, Val Loss=0.1338
Epoch 5: Train Acc=0.5415, Train Loss=0.1156, Val Acc=0.5544, Val Loss=0.1144
Epoch 6: Train Acc=0.6252, Train Loss=0.0950, Val Acc=0.6354, Val Loss=0.0937
Epoch 7: Train Acc=0.6637, Train Loss=0.0824, Val Acc=0.6770, Val Loss=0.0810
Epoch 8: Train Acc=0.7308, Train Loss=0.0691, Val Acc=0.7426, Val Loss=0.0681
Epoch 9: Train Acc=0.7546, Train Loss=0.0640, Val Acc=0.7588, Val Loss=0.0634
Epoch 10: Train Acc=0.8002, Train Loss=0.0594, Val Acc=0.8010, Val Loss=0.0590


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▂▃▅▆▆▇▇█
train_loss,█▇▆▅▄▃▂▂▁▁
val_acc,▁▂▂▃▅▆▇▇▇█
val_loss,█▇▆▅▄▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.80022
train_loss,0.05945
val_acc,0.801
val_loss,0.05905


[34m[1mwandb[0m: Agent Starting Run: ztdii9fh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303
Epoch 2: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303
Epoch 3: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303
Epoch 4: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303
Epoch 5: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303
Epoch 6: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303
Epoch 7: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303
Epoch 8: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303
Epoch 9: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303
Epoch 10: Train Acc=0.1001, Train Loss=0.2303, Val Acc=0.0986, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▅▃▂▂▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▃▁▁▂▃▄▄▅▆

0,1
epoch,10.0
train_acc,0.10013
train_loss,0.23026
val_acc,0.0986
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: lym36xbo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁█████████
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁█████████

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: 65hpwtip with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.2996, Train Loss=0.2251, Val Acc=0.3042, Val Loss=0.2251
Epoch 2: Train Acc=0.4006, Train Loss=0.2163, Val Acc=0.4048, Val Loss=0.2161
Epoch 3: Train Acc=0.4272, Train Loss=0.1895, Val Acc=0.4398, Val Loss=0.1888
Epoch 4: Train Acc=0.5363, Train Loss=0.1382, Val Acc=0.5486, Val Loss=0.1367
Epoch 5: Train Acc=0.6050, Train Loss=0.1067, Val Acc=0.6132, Val Loss=0.1051
Epoch 6: Train Acc=0.6452, Train Loss=0.0943, Val Acc=0.6556, Val Loss=0.0930
Epoch 7: Train Acc=0.6738, Train Loss=0.0875, Val Acc=0.6822, Val Loss=0.0863
Epoch 8: Train Acc=0.6975, Train Loss=0.0824, Val Acc=0.7048, Val Loss=0.0813
Epoch 9: Train Acc=0.7152, Train Loss=0.0781, Val Acc=0.7252, Val Loss=0.0770
Epoch 10: Train Acc=0.7298, Train Loss=0.0744, Val Acc=0.7352, Val Loss=0.0734


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▃▅▆▇▇▇██
train_loss,██▆▄▂▂▂▁▁▁
val_acc,▁▃▃▅▆▇▇███
val_loss,██▆▄▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.72982
train_loss,0.07442
val_acc,0.7352
val_loss,0.07336


[34m[1mwandb[0m: Agent Starting Run: jsmqqgs2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7674, Train Loss=0.0650, Val Acc=0.7720, Val Loss=0.0647
Epoch 2: Train Acc=0.7991, Train Loss=0.0534, Val Acc=0.8060, Val Loss=0.0529
Epoch 3: Train Acc=0.8196, Train Loss=0.0485, Val Acc=0.8242, Val Loss=0.0484
Epoch 4: Train Acc=0.8378, Train Loss=0.0451, Val Acc=0.8404, Val Loss=0.0453
Epoch 5: Train Acc=0.8474, Train Loss=0.0426, Val Acc=0.8498, Val Loss=0.0431


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.84742
train_loss,0.04262
val_acc,0.8498
val_loss,0.0431


[34m[1mwandb[0m: Agent Starting Run: uv5lr5z4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▃▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Agent Starting Run: ujmknx5w with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1998, Train Loss=0.1687, Val Acc=0.1980, Val Loss=0.1685
Epoch 2: Train Acc=0.2049, Train Loss=0.1678, Val Acc=0.2190, Val Loss=0.1675
Epoch 3: Train Acc=0.2022, Train Loss=0.1673, Val Acc=0.2162, Val Loss=0.1670
Epoch 4: Train Acc=0.2034, Train Loss=0.1668, Val Acc=0.2176, Val Loss=0.1665
Epoch 5: Train Acc=0.2064, Train Loss=0.1661, Val Acc=0.2204, Val Loss=0.1659
Epoch 6: Train Acc=0.2194, Train Loss=0.1648, Val Acc=0.2330, Val Loss=0.1646
Epoch 7: Train Acc=0.3489, Train Loss=0.1585, Val Acc=0.3672, Val Loss=0.1584
Epoch 8: Train Acc=0.3966, Train Loss=0.1450, Val Acc=0.4042, Val Loss=0.1451
Epoch 9: Train Acc=0.4053, Train Loss=0.1355, Val Acc=0.4142, Val Loss=0.1356
Epoch 10: Train Acc=0.4278, Train Loss=0.1298, Val Acc=0.4430, Val Loss=0.1300


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▂▆▇▇█
train_loss,█████▇▆▄▂▁
val_acc,▁▂▂▂▂▂▆▇▇█
val_loss,█████▇▆▄▂▁

0,1
epoch,10.0
train_acc,0.42784
train_loss,0.12984
val_acc,0.443
val_loss,0.13


[34m[1mwandb[0m: Agent Starting Run: 3q5j5u10 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1906, Train Loss=0.2274, Val Acc=0.1872, Val Loss=0.2274
Epoch 2: Train Acc=0.3125, Train Loss=0.1855, Val Acc=0.3232, Val Loss=0.1849
Epoch 3: Train Acc=0.3725, Train Loss=0.1546, Val Acc=0.3760, Val Loss=0.1542
Epoch 4: Train Acc=0.4428, Train Loss=0.1360, Val Acc=0.4488, Val Loss=0.1356
Epoch 5: Train Acc=0.4977, Train Loss=0.1234, Val Acc=0.5034, Val Loss=0.1231
Epoch 6: Train Acc=0.5400, Train Loss=0.1136, Val Acc=0.5434, Val Loss=0.1132
Epoch 7: Train Acc=0.5816, Train Loss=0.1043, Val Acc=0.5944, Val Loss=0.1037
Epoch 8: Train Acc=0.6431, Train Loss=0.0956, Val Acc=0.6658, Val Loss=0.0947
Epoch 9: Train Acc=0.6797, Train Loss=0.0873, Val Acc=0.6934, Val Loss=0.0862
Epoch 10: Train Acc=0.7060, Train Loss=0.0799, Val Acc=0.7228, Val Loss=0.0786


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▃▄▅▆▆▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▃▃▄▅▆▆▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.70604
train_loss,0.07992
val_acc,0.7228
val_loss,0.07863


[34m[1mwandb[0m: Agent Starting Run: dt1udh5q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Agent Starting Run: yw806g0p with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2022, Train Loss=0.1672, Val Acc=0.2008, Val Loss=0.1670
Epoch 2: Train Acc=0.2338, Train Loss=0.1662, Val Acc=0.2454, Val Loss=0.1659
Epoch 3: Train Acc=0.2353, Train Loss=0.1642, Val Acc=0.2482, Val Loss=0.1640
Epoch 4: Train Acc=0.3074, Train Loss=0.1578, Val Acc=0.3192, Val Loss=0.1575
Epoch 5: Train Acc=0.4337, Train Loss=0.1309, Val Acc=0.4448, Val Loss=0.1307


0,1
epoch,▁▃▅▆█
train_acc,▁▂▂▄█
train_loss,██▇▆▁
val_acc,▁▂▂▄█
val_loss,██▇▆▁

0,1
epoch,5.0
train_acc,0.43367
train_loss,0.13093
val_acc,0.4448
val_loss,0.13072


[34m[1mwandb[0m: Agent Starting Run: yaryruy6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6813, Train Loss=0.0826, Val Acc=0.6840, Val Loss=0.0826
Epoch 2: Train Acc=0.7287, Train Loss=0.0682, Val Acc=0.7342, Val Loss=0.0678
Epoch 3: Train Acc=0.7654, Train Loss=0.0592, Val Acc=0.7690, Val Loss=0.0594
Epoch 4: Train Acc=0.8407, Train Loss=0.0481, Val Acc=0.8342, Val Loss=0.0500
Epoch 5: Train Acc=0.8567, Train Loss=0.0436, Val Acc=0.8498, Val Loss=0.0462


0,1
epoch,▁▃▅▆█
train_acc,▁▃▄▇█
train_loss,█▅▄▂▁
val_acc,▁▃▅▇█
val_loss,█▅▄▂▁

0,1
epoch,5.0
train_acc,0.85669
train_loss,0.04364
val_acc,0.8498
val_loss,0.04621


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ogj7ncv6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5847, Train Loss=0.1471, Val Acc=0.5978, Val Loss=0.1461
Epoch 2: Train Acc=0.6834, Train Loss=0.0952, Val Acc=0.6888, Val Loss=0.0941
Epoch 3: Train Acc=0.7299, Train Loss=0.0787, Val Acc=0.7348, Val Loss=0.0778
Epoch 4: Train Acc=0.7642, Train Loss=0.0699, Val Acc=0.7690, Val Loss=0.0691
Epoch 5: Train Acc=0.7865, Train Loss=0.0640, Val Acc=0.7896, Val Loss=0.0633
Epoch 6: Train Acc=0.7987, Train Loss=0.0600, Val Acc=0.8050, Val Loss=0.0593
Epoch 7: Train Acc=0.8067, Train Loss=0.0570, Val Acc=0.8150, Val Loss=0.0564
Epoch 8: Train Acc=0.8121, Train Loss=0.0548, Val Acc=0.8192, Val Loss=0.0543
Epoch 9: Train Acc=0.8164, Train Loss=0.0531, Val Acc=0.8254, Val Loss=0.0526
Epoch 10: Train Acc=0.8197, Train Loss=0.0518, Val Acc=0.8272, Val Loss=0.0513


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_acc,▁▄▅▆▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.81969
train_loss,0.05176
val_acc,0.8272
val_loss,0.05128


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: loc60m2b with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▅▄▄▃▂▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▄▅▆▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: 6jppp0jz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7437, Train Loss=0.0942, Val Acc=0.7472, Val Loss=0.0938
Epoch 2: Train Acc=0.7810, Train Loss=0.0777, Val Acc=0.7842, Val Loss=0.0772
Epoch 3: Train Acc=0.7983, Train Loss=0.0693, Val Acc=0.8030, Val Loss=0.0686
Epoch 4: Train Acc=0.8086, Train Loss=0.0638, Val Acc=0.8140, Val Loss=0.0631
Epoch 5: Train Acc=0.8158, Train Loss=0.0600, Val Acc=0.8194, Val Loss=0.0592


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.81584
train_loss,0.06002
val_acc,0.8194
val_loss,0.05923


[34m[1mwandb[0m: Agent Starting Run: e1ejjqff with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7489, Train Loss=0.0704, Val Acc=0.7552, Val Loss=0.0704
Epoch 2: Train Acc=0.7803, Train Loss=0.0603, Val Acc=0.7868, Val Loss=0.0601
Epoch 3: Train Acc=0.7944, Train Loss=0.0553, Val Acc=0.8002, Val Loss=0.0551
Epoch 4: Train Acc=0.8080, Train Loss=0.0516, Val Acc=0.8088, Val Loss=0.0514
Epoch 5: Train Acc=0.8239, Train Loss=0.0480, Val Acc=0.8264, Val Loss=0.0476
Epoch 6: Train Acc=0.8399, Train Loss=0.0448, Val Acc=0.8414, Val Loss=0.0445
Epoch 7: Train Acc=0.8496, Train Loss=0.0424, Val Acc=0.8506, Val Loss=0.0423
Epoch 8: Train Acc=0.8559, Train Loss=0.0407, Val Acc=0.8560, Val Loss=0.0408
Epoch 9: Train Acc=0.8601, Train Loss=0.0393, Val Acc=0.8580, Val Loss=0.0396
Epoch 10: Train Acc=0.8648, Train Loss=0.0381, Val Acc=0.8644, Val Loss=0.0386


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_acc,▁▃▄▄▆▇▇▇██
val_loss,█▆▅▄▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.86484
train_loss,0.03812
val_acc,0.8644
val_loss,0.03856


[34m[1mwandb[0m: Agent Starting Run: h0o9nhej with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▄█▇▇▆▅▄▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁██▇▇▆▆▅▅▄

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: 54sfkixn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8609, Train Loss=0.0379, Val Acc=0.8580, Val Loss=0.0383
Epoch 2: Train Acc=0.8769, Train Loss=0.0330, Val Acc=0.8718, Val Loss=0.0349
Epoch 3: Train Acc=0.8861, Train Loss=0.0305, Val Acc=0.8780, Val Loss=0.0336
Epoch 4: Train Acc=0.8935, Train Loss=0.0284, Val Acc=0.8810, Val Loss=0.0326
Epoch 5: Train Acc=0.8989, Train Loss=0.0267, Val Acc=0.8858, Val Loss=0.0318


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.89889
train_loss,0.0267
val_acc,0.8858
val_loss,0.03177


[34m[1mwandb[0m: Agent Starting Run: oju7yyae with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0980, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁███
train_loss,█▂▁▁▁
val_acc,██▁▁▁
val_loss,█▁▁▁▁

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: 7xkds4nc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7263, Train Loss=0.0836, Val Acc=0.7302, Val Loss=0.0824
Epoch 2: Train Acc=0.7813, Train Loss=0.0655, Val Acc=0.7870, Val Loss=0.0644
Epoch 3: Train Acc=0.8057, Train Loss=0.0575, Val Acc=0.8126, Val Loss=0.0564
Epoch 4: Train Acc=0.8182, Train Loss=0.0529, Val Acc=0.8224, Val Loss=0.0518
Epoch 5: Train Acc=0.8254, Train Loss=0.0500, Val Acc=0.8308, Val Loss=0.0490


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.82538
train_loss,0.05002
val_acc,0.8308
val_loss,0.04897


[34m[1mwandb[0m: Agent Starting Run: hgonntye with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▅▄▃▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▃▄▅▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Agent Starting Run: 4ssi8o8n with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8470, Train Loss=0.0428, Val Acc=0.8474, Val Loss=0.0423
Epoch 2: Train Acc=0.8658, Train Loss=0.0371, Val Acc=0.8642, Val Loss=0.0378
Epoch 3: Train Acc=0.8756, Train Loss=0.0342, Val Acc=0.8726, Val Loss=0.0356
Epoch 4: Train Acc=0.8821, Train Loss=0.0323, Val Acc=0.8750, Val Loss=0.0341
Epoch 5: Train Acc=0.8869, Train Loss=0.0308, Val Acc=0.8792, Val Loss=0.0330


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▇▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.88687
train_loss,0.03076
val_acc,0.8792
val_loss,0.03301


[34m[1mwandb[0m: Agent Starting Run: qbbde908 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.8423, Train Loss=0.0444, Val Acc=0.8464, Val Loss=0.0444
Epoch 2: Train Acc=0.8556, Train Loss=0.0395, Val Acc=0.8514, Val Loss=0.0413
Epoch 3: Train Acc=0.8649, Train Loss=0.0371, Val Acc=0.8600, Val Loss=0.0398
Epoch 4: Train Acc=0.8742, Train Loss=0.0354, Val Acc=0.8646, Val Loss=0.0390
Epoch 5: Train Acc=0.8846, Train Loss=0.0326, Val Acc=0.8720, Val Loss=0.0374


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▆█
train_loss,█▅▄▃▁
val_acc,▁▂▅▆█
val_loss,█▅▃▃▁

0,1
epoch,5.0
train_acc,0.88458
train_loss,0.03259
val_acc,0.872
val_loss,0.0374


[34m[1mwandb[0m: Agent Starting Run: mkamv1al with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6092, Train Loss=0.1038, Val Acc=0.6054, Val Loss=0.1036
Epoch 2: Train Acc=0.7438, Train Loss=0.0711, Val Acc=0.7474, Val Loss=0.0710
Epoch 3: Train Acc=0.7631, Train Loss=0.0610, Val Acc=0.7726, Val Loss=0.0604
Epoch 4: Train Acc=0.7806, Train Loss=0.0562, Val Acc=0.7888, Val Loss=0.0553
Epoch 5: Train Acc=0.7950, Train Loss=0.0528, Val Acc=0.8006, Val Loss=0.0519


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇▇█
train_loss,█▄▂▁▁
val_acc,▁▆▇██
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.795
train_loss,0.05279
val_acc,0.8006
val_loss,0.05192


[34m[1mwandb[0m: Agent Starting Run: wqf41ne4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5786, Train Loss=0.1362, Val Acc=0.5816, Val Loss=0.1356
Epoch 2: Train Acc=0.6172, Train Loss=0.1092, Val Acc=0.6182, Val Loss=0.1090
Epoch 3: Train Acc=0.6595, Train Loss=0.0949, Val Acc=0.6664, Val Loss=0.0948
Epoch 4: Train Acc=0.6963, Train Loss=0.0858, Val Acc=0.7034, Val Loss=0.0858
Epoch 5: Train Acc=0.7175, Train Loss=0.0794, Val Acc=0.7236, Val Loss=0.0794
Epoch 6: Train Acc=0.7361, Train Loss=0.0746, Val Acc=0.7422, Val Loss=0.0745
Epoch 7: Train Acc=0.7503, Train Loss=0.0706, Val Acc=0.7558, Val Loss=0.0704
Epoch 8: Train Acc=0.7604, Train Loss=0.0672, Val Acc=0.7642, Val Loss=0.0670
Epoch 9: Train Acc=0.7672, Train Loss=0.0645, Val Acc=0.7710, Val Loss=0.0641
Epoch 10: Train Acc=0.7737, Train Loss=0.0621, Val Acc=0.7738, Val Loss=0.0617


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▄▅▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_acc,▁▂▄▅▆▇▇███
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.77375
train_loss,0.06214
val_acc,0.7738
val_loss,0.06167


[34m[1mwandb[0m: Agent Starting Run: thtaba78 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5013, Train Loss=0.1711, Val Acc=0.5040, Val Loss=0.1705
Epoch 2: Train Acc=0.5504, Train Loss=0.1449, Val Acc=0.5554, Val Loss=0.1442
Epoch 3: Train Acc=0.5689, Train Loss=0.1304, Val Acc=0.5682, Val Loss=0.1297
Epoch 4: Train Acc=0.5833, Train Loss=0.1202, Val Acc=0.5816, Val Loss=0.1197
Epoch 5: Train Acc=0.5981, Train Loss=0.1125, Val Acc=0.5944, Val Loss=0.1121
Epoch 6: Train Acc=0.6124, Train Loss=0.1062, Val Acc=0.6096, Val Loss=0.1060
Epoch 7: Train Acc=0.6286, Train Loss=0.1009, Val Acc=0.6264, Val Loss=0.1007
Epoch 8: Train Acc=0.6475, Train Loss=0.0962, Val Acc=0.6516, Val Loss=0.0961
Epoch 9: Train Acc=0.6670, Train Loss=0.0919, Val Acc=0.6710, Val Loss=0.0918
Epoch 10: Train Acc=0.6851, Train Loss=0.0878, Val Acc=0.6908, Val Loss=0.0878


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▄▅▅▆▇▇█
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▃▃▄▄▅▆▇▇█
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.68509
train_loss,0.08783
val_acc,0.6908
val_loss,0.08784


[34m[1mwandb[0m: Agent Starting Run: rmpx2ylt with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1987, Train Loss=0.2266, Val Acc=0.1936, Val Loss=0.2267
Epoch 2: Train Acc=0.2004, Train Loss=0.2140, Val Acc=0.1948, Val Loss=0.2141
Epoch 3: Train Acc=0.2044, Train Loss=0.1986, Val Acc=0.1934, Val Loss=0.1986
Epoch 4: Train Acc=0.2030, Train Loss=0.1877, Val Acc=0.1916, Val Loss=0.1877
Epoch 5: Train Acc=0.2044, Train Loss=0.1810, Val Acc=0.1936, Val Loss=0.1810


0,1
epoch,▁▃▅▆█
train_acc,▁▃█▆█
train_loss,█▆▄▂▁
val_acc,▅█▅▁▅
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_acc,0.2044
train_loss,0.18099
val_acc,0.1936
val_loss,0.18097


[34m[1mwandb[0m: Agent Starting Run: 8kdkpxqy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8127, Train Loss=0.0541, Val Acc=0.8180, Val Loss=0.0531
Epoch 2: Train Acc=0.8414, Train Loss=0.0445, Val Acc=0.8440, Val Loss=0.0436
Epoch 3: Train Acc=0.8543, Train Loss=0.0409, Val Acc=0.8574, Val Loss=0.0403
Epoch 4: Train Acc=0.8625, Train Loss=0.0388, Val Acc=0.8614, Val Loss=0.0385
Epoch 5: Train Acc=0.8681, Train Loss=0.0372, Val Acc=0.8672, Val Loss=0.0373
Epoch 6: Train Acc=0.8717, Train Loss=0.0359, Val Acc=0.8706, Val Loss=0.0364
Epoch 7: Train Acc=0.8749, Train Loss=0.0349, Val Acc=0.8736, Val Loss=0.0357
Epoch 8: Train Acc=0.8785, Train Loss=0.0340, Val Acc=0.8740, Val Loss=0.0351
Epoch 9: Train Acc=0.8815, Train Loss=0.0332, Val Acc=0.8746, Val Loss=0.0346
Epoch 10: Train Acc=0.8844, Train Loss=0.0325, Val Acc=0.8764, Val Loss=0.0342


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▁▁▁
val_acc,▁▄▆▆▇▇████
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.88444
train_loss,0.03252
val_acc,0.8764
val_loss,0.03417


[34m[1mwandb[0m: Agent Starting Run: piq4vj8h with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1139, Train Loss=0.2246, Val Acc=0.1146, Val Loss=0.2246
Epoch 2: Train Acc=0.1979, Train Loss=0.1967, Val Acc=0.2012, Val Loss=0.1963
Epoch 3: Train Acc=0.2079, Train Loss=0.1801, Val Acc=0.2016, Val Loss=0.1795
Epoch 4: Train Acc=0.2743, Train Loss=0.1652, Val Acc=0.2836, Val Loss=0.1647
Epoch 5: Train Acc=0.3109, Train Loss=0.1561, Val Acc=0.3196, Val Loss=0.1557


0,1
epoch,▁▃▅▆█
train_acc,▁▄▄▇█
train_loss,█▅▃▂▁
val_acc,▁▄▄▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.31091
train_loss,0.15609
val_acc,0.3196
val_loss,0.15571


[34m[1mwandb[0m: Agent Starting Run: elx0oe94 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8136, Train Loss=0.0542, Val Acc=0.8154, Val Loss=0.0536
Epoch 2: Train Acc=0.8314, Train Loss=0.0489, Val Acc=0.8338, Val Loss=0.0483
Epoch 3: Train Acc=0.8387, Train Loss=0.0462, Val Acc=0.8432, Val Loss=0.0457
Epoch 4: Train Acc=0.8453, Train Loss=0.0445, Val Acc=0.8476, Val Loss=0.0441
Epoch 5: Train Acc=0.8492, Train Loss=0.0432, Val Acc=0.8516, Val Loss=0.0429


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.84924
train_loss,0.0432
val_acc,0.8516
val_loss,0.04292


[34m[1mwandb[0m: Agent Starting Run: 18kznfhd with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8443, Train Loss=0.0418, Val Acc=0.8436, Val Loss=0.0422
Epoch 2: Train Acc=0.8612, Train Loss=0.0371, Val Acc=0.8576, Val Loss=0.0390
Epoch 3: Train Acc=0.8737, Train Loss=0.0335, Val Acc=0.8652, Val Loss=0.0362
Epoch 4: Train Acc=0.8827, Train Loss=0.0309, Val Acc=0.8722, Val Loss=0.0345
Epoch 5: Train Acc=0.8903, Train Loss=0.0289, Val Acc=0.8754, Val Loss=0.0334


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.89025
train_loss,0.02894
val_acc,0.8754
val_loss,0.03337


[34m[1mwandb[0m: Agent Starting Run: ii85yjmj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁▃▆▇█
val_acc,▁▁▁▁▁
val_loss,▁▅▇██

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0y18n0mu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2302
Epoch 8: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2302
Epoch 9: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2302
Epoch 10: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2302


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▇▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▇▆▅▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23016
val_acc,0.0976
val_loss,0.23019


[34m[1mwandb[0m: Agent Starting Run: jrdvjgqq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.2027, Train Loss=0.1796, Val Acc=0.2016, Val Loss=0.1796
Epoch 2: Train Acc=0.3581, Train Loss=0.1520, Val Acc=0.3528, Val Loss=0.1526
Epoch 3: Train Acc=0.4116, Train Loss=0.1281, Val Acc=0.4210, Val Loss=0.1284
Epoch 4: Train Acc=0.5064, Train Loss=0.1166, Val Acc=0.5052, Val Loss=0.1167
Epoch 5: Train Acc=0.5641, Train Loss=0.1043, Val Acc=0.5678, Val Loss=0.1044
Epoch 6: Train Acc=0.6031, Train Loss=0.0939, Val Acc=0.6066, Val Loss=0.0937
Epoch 7: Train Acc=0.6300, Train Loss=0.0881, Val Acc=0.6354, Val Loss=0.0876
Epoch 8: Train Acc=0.6622, Train Loss=0.0837, Val Acc=0.6684, Val Loss=0.0832
Epoch 9: Train Acc=0.6962, Train Loss=0.0795, Val Acc=0.7000, Val Loss=0.0790
Epoch 10: Train Acc=0.7136, Train Loss=0.0759, Val Acc=0.7158, Val Loss=0.0753


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_acc,▁▃▄▅▆▇▇▇██
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.7136
train_loss,0.07591
val_acc,0.7158
val_loss,0.07526


[34m[1mwandb[0m: Agent Starting Run: iuzrsq5x with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7928, Train Loss=0.0607, Val Acc=0.7972, Val Loss=0.0599
Epoch 2: Train Acc=0.8245, Train Loss=0.0500, Val Acc=0.8278, Val Loss=0.0492
Epoch 3: Train Acc=0.8363, Train Loss=0.0459, Val Acc=0.8436, Val Loss=0.0453
Epoch 4: Train Acc=0.8449, Train Loss=0.0434, Val Acc=0.8496, Val Loss=0.0430
Epoch 5: Train Acc=0.8503, Train Loss=0.0416, Val Acc=0.8548, Val Loss=0.0415


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.85035
train_loss,0.04163
val_acc,0.8548
val_loss,0.0415


[34m[1mwandb[0m: Agent Starting Run: wcogp9ju with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1817, Train Loss=0.2299, Val Acc=0.1728, Val Loss=0.2299
Epoch 2: Train Acc=0.5791, Train Loss=0.1068, Val Acc=0.5888, Val Loss=0.1064
Epoch 3: Train Acc=0.7332, Train Loss=0.0761, Val Acc=0.7376, Val Loss=0.0762
Epoch 4: Train Acc=0.7728, Train Loss=0.0619, Val Acc=0.7814, Val Loss=0.0624
Epoch 5: Train Acc=0.8035, Train Loss=0.0531, Val Acc=0.8094, Val Loss=0.0530
Epoch 6: Train Acc=0.8341, Train Loss=0.0475, Val Acc=0.8374, Val Loss=0.0472
Epoch 7: Train Acc=0.8471, Train Loss=0.0438, Val Acc=0.8480, Val Loss=0.0437
Epoch 8: Train Acc=0.8557, Train Loss=0.0409, Val Acc=0.8540, Val Loss=0.0411
Epoch 9: Train Acc=0.8626, Train Loss=0.0387, Val Acc=0.8610, Val Loss=0.0394
Epoch 10: Train Acc=0.8686, Train Loss=0.0369, Val Acc=0.8630, Val Loss=0.0381


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▇▇▇█████
train_loss,█▄▂▂▂▁▁▁▁▁
val_acc,▁▅▇▇▇█████
val_loss,█▃▂▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.86864
train_loss,0.03691
val_acc,0.863
val_loss,0.03806


[34m[1mwandb[0m: Agent Starting Run: 3ks7tyu8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6753, Train Loss=0.0826, Val Acc=0.6818, Val Loss=0.0823
Epoch 2: Train Acc=0.7190, Train Loss=0.0759, Val Acc=0.7230, Val Loss=0.0762
Epoch 3: Train Acc=0.7392, Train Loss=0.0714, Val Acc=0.7420, Val Loss=0.0719
Epoch 4: Train Acc=0.7494, Train Loss=0.0686, Val Acc=0.7516, Val Loss=0.0691
Epoch 5: Train Acc=0.7569, Train Loss=0.0663, Val Acc=0.7584, Val Loss=0.0667
Epoch 6: Train Acc=0.7651, Train Loss=0.0636, Val Acc=0.7658, Val Loss=0.0642
Epoch 7: Train Acc=0.7785, Train Loss=0.0604, Val Acc=0.7802, Val Loss=0.0613
Epoch 8: Train Acc=0.7874, Train Loss=0.0574, Val Acc=0.7880, Val Loss=0.0586
Epoch 9: Train Acc=0.7925, Train Loss=0.0552, Val Acc=0.7936, Val Loss=0.0565
Epoch 10: Train Acc=0.7983, Train Loss=0.0532, Val Acc=0.7982, Val Loss=0.0546


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▅▆▆▇▇██
train_loss,█▆▅▅▄▃▃▂▁▁
val_acc,▁▃▅▅▆▆▇▇██
val_loss,█▆▅▅▄▃▃▂▁▁

0,1
epoch,10.0
train_acc,0.79835
train_loss,0.0532
val_acc,0.7982
val_loss,0.05459


[34m[1mwandb[0m: Agent Starting Run: axvbegah with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1937, Train Loss=0.2127, Val Acc=0.1898, Val Loss=0.2126
Epoch 2: Train Acc=0.1992, Train Loss=0.1741, Val Acc=0.1982, Val Loss=0.1739
Epoch 3: Train Acc=0.2000, Train Loss=0.1702, Val Acc=0.1992, Val Loss=0.1699
Epoch 4: Train Acc=0.2004, Train Loss=0.1690, Val Acc=0.1992, Val Loss=0.1687
Epoch 5: Train Acc=0.2011, Train Loss=0.1686, Val Acc=0.1988, Val Loss=0.1681


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇▇█
train_loss,█▂▁▁▁
val_acc,▁▇███
val_loss,█▂▁▁▁

0,1
epoch,5.0
train_acc,0.20109
train_loss,0.16856
val_acc,0.1988
val_loss,0.16807


[34m[1mwandb[0m: Agent Starting Run: onzuzlh2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▅▅▄▃▂▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▄▅▆▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: 8dkhbov0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.4232, Train Loss=0.1265, Val Acc=0.4296, Val Loss=0.1260
Epoch 2: Train Acc=0.5057, Train Loss=0.1111, Val Acc=0.5138, Val Loss=0.1108
Epoch 3: Train Acc=0.5280, Train Loss=0.1065, Val Acc=0.5378, Val Loss=0.1063
Epoch 4: Train Acc=0.5382, Train Loss=0.1044, Val Acc=0.5466, Val Loss=0.1042
Epoch 5: Train Acc=0.5445, Train Loss=0.1030, Val Acc=0.5596, Val Loss=0.1029


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇██
train_loss,█▃▂▁▁
val_acc,▁▆▇▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,0.54449
train_loss,0.103
val_acc,0.5596
val_loss,0.10288


[34m[1mwandb[0m: Agent Starting Run: n92hskv1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0999, Train Loss=0.2280, Val Acc=0.1012, Val Loss=0.2280
Epoch 2: Train Acc=0.1989, Train Loss=0.2029, Val Acc=0.1910, Val Loss=0.2029
Epoch 3: Train Acc=0.2002, Train Loss=0.1819, Val Acc=0.1926, Val Loss=0.1819
Epoch 4: Train Acc=0.2013, Train Loss=0.1745, Val Acc=0.1930, Val Loss=0.1745
Epoch 5: Train Acc=0.2020, Train Loss=0.1712, Val Acc=0.1930, Val Loss=0.1712


0,1
epoch,▁▃▅▆█
train_acc,▁████
train_loss,█▅▂▁▁
val_acc,▁████
val_loss,█▅▂▁▁

0,1
epoch,5.0
train_acc,0.20204
train_loss,0.17123
val_acc,0.193
val_loss,0.17118


[34m[1mwandb[0m: Agent Starting Run: r80pgce7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7971, Train Loss=0.0580, Val Acc=0.8040, Val Loss=0.0573
Epoch 2: Train Acc=0.8372, Train Loss=0.0480, Val Acc=0.8398, Val Loss=0.0479
Epoch 3: Train Acc=0.8498, Train Loss=0.0434, Val Acc=0.8496, Val Loss=0.0440
Epoch 4: Train Acc=0.8583, Train Loss=0.0407, Val Acc=0.8542, Val Loss=0.0416
Epoch 5: Train Acc=0.8648, Train Loss=0.0389, Val Acc=0.8602, Val Loss=0.0401
Epoch 6: Train Acc=0.8694, Train Loss=0.0374, Val Acc=0.8600, Val Loss=0.0390
Epoch 7: Train Acc=0.8738, Train Loss=0.0363, Val Acc=0.8642, Val Loss=0.0382
Epoch 8: Train Acc=0.8769, Train Loss=0.0353, Val Acc=0.8688, Val Loss=0.0375
Epoch 9: Train Acc=0.8789, Train Loss=0.0345, Val Acc=0.8704, Val Loss=0.0370
Epoch 10: Train Acc=0.8804, Train Loss=0.0338, Val Acc=0.8704, Val Loss=0.0366


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▅▆▆▇▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.8804
train_loss,0.03377
val_acc,0.8704
val_loss,0.0366


[34m[1mwandb[0m: Agent Starting Run: 3tyhcnj7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7788, Train Loss=0.0614, Val Acc=0.7868, Val Loss=0.0611
Epoch 2: Train Acc=0.8254, Train Loss=0.0499, Val Acc=0.8308, Val Loss=0.0500
Epoch 3: Train Acc=0.8395, Train Loss=0.0456, Val Acc=0.8428, Val Loss=0.0462
Epoch 4: Train Acc=0.8491, Train Loss=0.0429, Val Acc=0.8488, Val Loss=0.0437
Epoch 5: Train Acc=0.8558, Train Loss=0.0408, Val Acc=0.8554, Val Loss=0.0419


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.85576
train_loss,0.04084
val_acc,0.8554
val_loss,0.04188


[34m[1mwandb[0m: Agent Starting Run: 79to6kt2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▂▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xryvx0cx with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7748, Train Loss=0.0661, Val Acc=0.7832, Val Loss=0.0656
Epoch 2: Train Acc=0.7973, Train Loss=0.0590, Val Acc=0.8044, Val Loss=0.0585
Epoch 3: Train Acc=0.8091, Train Loss=0.0557, Val Acc=0.8154, Val Loss=0.0551
Epoch 4: Train Acc=0.8161, Train Loss=0.0536, Val Acc=0.8214, Val Loss=0.0528
Epoch 5: Train Acc=0.8212, Train Loss=0.0519, Val Acc=0.8264, Val Loss=0.0511


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.82116
train_loss,0.05191
val_acc,0.8264
val_loss,0.05114


[34m[1mwandb[0m: Agent Starting Run: qm0f9stt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▄▂▂▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▃▅▆▆▇▇███

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: 013gn0xv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Agent Starting Run: qn3a33s6 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2302


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.2302
val_acc,0.0976
val_loss,0.23025


[34m[1mwandb[0m: Agent Starting Run: fx8ig67o with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5547, Train Loss=0.1362, Val Acc=0.5552, Val Loss=0.1357
Epoch 2: Train Acc=0.6363, Train Loss=0.1117, Val Acc=0.6424, Val Loss=0.1116
Epoch 3: Train Acc=0.6781, Train Loss=0.0973, Val Acc=0.6792, Val Loss=0.0973
Epoch 4: Train Acc=0.7044, Train Loss=0.0871, Val Acc=0.7074, Val Loss=0.0871
Epoch 5: Train Acc=0.7185, Train Loss=0.0799, Val Acc=0.7256, Val Loss=0.0799
Epoch 6: Train Acc=0.7285, Train Loss=0.0746, Val Acc=0.7352, Val Loss=0.0746
Epoch 7: Train Acc=0.7373, Train Loss=0.0706, Val Acc=0.7464, Val Loss=0.0706
Epoch 8: Train Acc=0.7464, Train Loss=0.0675, Val Acc=0.7550, Val Loss=0.0674
Epoch 9: Train Acc=0.7541, Train Loss=0.0650, Val Acc=0.7634, Val Loss=0.0648
Epoch 10: Train Acc=0.7615, Train Loss=0.0628, Val Acc=0.7698, Val Loss=0.0627


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇▇██
train_loss,█▆▄▃▃▂▂▁▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▆▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.76151
train_loss,0.06283
val_acc,0.7698
val_loss,0.06267


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zf46du73 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1987, Train Loss=0.2187, Val Acc=0.1920, Val Loss=0.2187
Epoch 2: Train Acc=0.1998, Train Loss=0.1817, Val Acc=0.1916, Val Loss=0.1815
Epoch 3: Train Acc=0.2005, Train Loss=0.1732, Val Acc=0.1926, Val Loss=0.1730
Epoch 4: Train Acc=0.2013, Train Loss=0.1704, Val Acc=0.1934, Val Loss=0.1702
Epoch 5: Train Acc=0.2027, Train Loss=0.1691, Val Acc=0.2022, Val Loss=0.1689
Epoch 6: Train Acc=0.2033, Train Loss=0.1684, Val Acc=0.2014, Val Loss=0.1682
Epoch 7: Train Acc=0.2037, Train Loss=0.1680, Val Acc=0.2024, Val Loss=0.1678
Epoch 8: Train Acc=0.2044, Train Loss=0.1678, Val Acc=0.2026, Val Loss=0.1675
Epoch 9: Train Acc=0.2049, Train Loss=0.1675, Val Acc=0.2028, Val Loss=0.1673
Epoch 10: Train Acc=0.2057, Train Loss=0.1674, Val Acc=0.2042, Val Loss=0.1672


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▃▄▅▆▆▇▇█
train_loss,█▃▂▁▁▁▁▁▁▁
val_acc,▁▁▂▂▇▆▇▇▇█
val_loss,█▃▂▁▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.20575
train_loss,0.16737
val_acc,0.2042
val_loss,0.16719


[34m[1mwandb[0m: Agent Starting Run: zilowggb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: 9jw1pvju with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1987, Train Loss=0.2225, Val Acc=0.1914, Val Loss=0.2225
Epoch 2: Train Acc=0.2003, Train Loss=0.1956, Val Acc=0.1932, Val Loss=0.1956
Epoch 3: Train Acc=0.2010, Train Loss=0.1826, Val Acc=0.1930, Val Loss=0.1825
Epoch 4: Train Acc=0.2018, Train Loss=0.1766, Val Acc=0.1934, Val Loss=0.1765
Epoch 5: Train Acc=0.2026, Train Loss=0.1734, Val Acc=0.1944, Val Loss=0.1733


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▇█
train_loss,█▄▂▁▁
val_acc,▁▅▅▆█
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.2026
train_loss,0.17337
val_acc,0.1944
val_loss,0.17327


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 71qigkmj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1115, Train Loss=0.2264, Val Acc=0.1088, Val Loss=0.2265
Epoch 2: Train Acc=0.1543, Train Loss=0.2232, Val Acc=0.1504, Val Loss=0.2232
Epoch 3: Train Acc=0.1860, Train Loss=0.2198, Val Acc=0.1820, Val Loss=0.2198
Epoch 4: Train Acc=0.2076, Train Loss=0.2161, Val Acc=0.2032, Val Loss=0.2161
Epoch 5: Train Acc=0.2265, Train Loss=0.2120, Val Acc=0.2228, Val Loss=0.2119
Epoch 6: Train Acc=0.2422, Train Loss=0.2072, Val Acc=0.2402, Val Loss=0.2070
Epoch 7: Train Acc=0.2574, Train Loss=0.2017, Val Acc=0.2536, Val Loss=0.2014
Epoch 8: Train Acc=0.2801, Train Loss=0.1957, Val Acc=0.2790, Val Loss=0.1953
Epoch 9: Train Acc=0.3170, Train Loss=0.1892, Val Acc=0.3150, Val Loss=0.1888
Epoch 10: Train Acc=0.3932, Train Loss=0.1823, Val Acc=0.3992, Val Loss=0.1817


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▃▃▄▄▅▅▆█
train_loss,█▇▇▆▆▅▄▃▂▁
val_acc,▁▂▃▃▄▄▄▅▆█
val_loss,█▇▇▆▆▅▄▃▂▁

0,1
epoch,10.0
train_acc,0.39324
train_loss,0.18226
val_acc,0.3992
val_loss,0.18172


[34m[1mwandb[0m: Agent Starting Run: yj9xwkwb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0999, Train Loss=0.2304, Val Acc=0.1012, Val Loss=0.2304
Epoch 2: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1012, Val Loss=0.2304
Epoch 3: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1012, Val Loss=0.2303
Epoch 4: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1012, Val Loss=0.2303
Epoch 5: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1012, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▅▃▂▁
val_acc,▁▁▁▁▁
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.09989
train_loss,0.23027
val_acc,0.1012
val_loss,0.23031


[34m[1mwandb[0m: Agent Starting Run: 8uk8p8lf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▅▃▂▁
val_acc,▁▁▁▁▁
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: zldr35rj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5035, Train Loss=0.1689, Val Acc=0.4962, Val Loss=0.1687
Epoch 2: Train Acc=0.6585, Train Loss=0.0988, Val Acc=0.6668, Val Loss=0.0981
Epoch 3: Train Acc=0.7058, Train Loss=0.0795, Val Acc=0.7144, Val Loss=0.0791
Epoch 4: Train Acc=0.7408, Train Loss=0.0713, Val Acc=0.7508, Val Loss=0.0709
Epoch 5: Train Acc=0.7657, Train Loss=0.0658, Val Acc=0.7744, Val Loss=0.0654


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▃▂▁▁
val_acc,▁▅▆▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,0.76569
train_loss,0.06585
val_acc,0.7744
val_loss,0.06543


[34m[1mwandb[0m: Agent Starting Run: sze84ygf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁███▇
val_acc,▁▁▁▁▁
val_loss,▁███▇

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23027
val_acc,0.0976
val_loss,0.2303


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xto4n322 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7303, Train Loss=0.0781, Val Acc=0.7260, Val Loss=0.0783
Epoch 2: Train Acc=0.7775, Train Loss=0.0626, Val Acc=0.7798, Val Loss=0.0628
Epoch 3: Train Acc=0.7952, Train Loss=0.0549, Val Acc=0.8014, Val Loss=0.0548
Epoch 4: Train Acc=0.8050, Train Loss=0.0511, Val Acc=0.8098, Val Loss=0.0509
Epoch 5: Train Acc=0.8162, Train Loss=0.0487, Val Acc=0.8204, Val Loss=0.0485
Epoch 6: Train Acc=0.8295, Train Loss=0.0468, Val Acc=0.8310, Val Loss=0.0466
Epoch 7: Train Acc=0.8395, Train Loss=0.0452, Val Acc=0.8408, Val Loss=0.0450
Epoch 8: Train Acc=0.8467, Train Loss=0.0437, Val Acc=0.8464, Val Loss=0.0437
Epoch 9: Train Acc=0.8515, Train Loss=0.0425, Val Acc=0.8506, Val Loss=0.0425
Epoch 10: Train Acc=0.8553, Train Loss=0.0414, Val Acc=0.8554, Val Loss=0.0415


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▅▆▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.85527
train_loss,0.04136
val_acc,0.8554
val_loss,0.04154


[34m[1mwandb[0m: Agent Starting Run: nh341qyf with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.8186, Train Loss=0.0489, Val Acc=0.8244, Val Loss=0.0483
Epoch 2: Train Acc=0.8572, Train Loss=0.0402, Val Acc=0.8584, Val Loss=0.0403
Epoch 3: Train Acc=0.8681, Train Loss=0.0370, Val Acc=0.8686, Val Loss=0.0376
Epoch 4: Train Acc=0.8756, Train Loss=0.0349, Val Acc=0.8740, Val Loss=0.0360
Epoch 5: Train Acc=0.8809, Train Loss=0.0335, Val Acc=0.8768, Val Loss=0.0350


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▄▃▂▁
val_acc,▁▆▇██
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.88089
train_loss,0.03346
val_acc,0.8768
val_loss,0.035


[34m[1mwandb[0m: Agent Starting Run: fccxpo7f with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8243, Train Loss=0.0496, Val Acc=0.8258, Val Loss=0.0490
Epoch 2: Train Acc=0.8382, Train Loss=0.0453, Val Acc=0.8416, Val Loss=0.0446
Epoch 3: Train Acc=0.8455, Train Loss=0.0433, Val Acc=0.8486, Val Loss=0.0426
Epoch 4: Train Acc=0.8498, Train Loss=0.0419, Val Acc=0.8570, Val Loss=0.0413
Epoch 5: Train Acc=0.8534, Train Loss=0.0409, Val Acc=0.8628, Val Loss=0.0404
Epoch 6: Train Acc=0.8566, Train Loss=0.0401, Val Acc=0.8650, Val Loss=0.0397
Epoch 7: Train Acc=0.8590, Train Loss=0.0394, Val Acc=0.8670, Val Loss=0.0391
Epoch 8: Train Acc=0.8614, Train Loss=0.0387, Val Acc=0.8660, Val Loss=0.0386
Epoch 9: Train Acc=0.8635, Train Loss=0.0382, Val Acc=0.8658, Val Loss=0.0381
Epoch 10: Train Acc=0.8653, Train Loss=0.0376, Val Acc=0.8676, Val Loss=0.0378


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▅▆▇▇▇██
train_loss,█▅▄▄▃▂▂▂▁▁
val_acc,▁▄▅▆▇█████
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.86527
train_loss,0.03764
val_acc,0.8676
val_loss,0.03776


[34m[1mwandb[0m: Agent Starting Run: 7v9zegor with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5578, Train Loss=0.1015, Val Acc=0.5634, Val Loss=0.1013
Epoch 2: Train Acc=0.5937, Train Loss=0.0974, Val Acc=0.6022, Val Loss=0.0972
Epoch 3: Train Acc=0.6105, Train Loss=0.0950, Val Acc=0.6166, Val Loss=0.0947
Epoch 4: Train Acc=0.6226, Train Loss=0.0932, Val Acc=0.6252, Val Loss=0.0929
Epoch 5: Train Acc=0.6326, Train Loss=0.0917, Val Acc=0.6368, Val Loss=0.0915
Epoch 6: Train Acc=0.6398, Train Loss=0.0905, Val Acc=0.6446, Val Loss=0.0902
Epoch 7: Train Acc=0.6473, Train Loss=0.0893, Val Acc=0.6498, Val Loss=0.0891
Epoch 8: Train Acc=0.6533, Train Loss=0.0883, Val Acc=0.6578, Val Loss=0.0880
Epoch 9: Train Acc=0.6591, Train Loss=0.0874, Val Acc=0.6640, Val Loss=0.0871
Epoch 10: Train Acc=0.6638, Train Loss=0.0865, Val Acc=0.6708, Val Loss=0.0862


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▄▄▅▆▆▇▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.66384
train_loss,0.0865
val_acc,0.6708
val_loss,0.08622


[34m[1mwandb[0m: Agent Starting Run: 9h20t69j with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7908, Train Loss=0.0558, Val Acc=0.7950, Val Loss=0.0553
Epoch 2: Train Acc=0.8277, Train Loss=0.0481, Val Acc=0.8332, Val Loss=0.0475
Epoch 3: Train Acc=0.8419, Train Loss=0.0443, Val Acc=0.8474, Val Loss=0.0438
Epoch 4: Train Acc=0.8487, Train Loss=0.0420, Val Acc=0.8538, Val Loss=0.0416
Epoch 5: Train Acc=0.8543, Train Loss=0.0404, Val Acc=0.8600, Val Loss=0.0401
Epoch 6: Train Acc=0.8585, Train Loss=0.0390, Val Acc=0.8626, Val Loss=0.0390
Epoch 7: Train Acc=0.8629, Train Loss=0.0379, Val Acc=0.8658, Val Loss=0.0380
Epoch 8: Train Acc=0.8662, Train Loss=0.0370, Val Acc=0.8680, Val Loss=0.0373
Epoch 9: Train Acc=0.8693, Train Loss=0.0362, Val Acc=0.8704, Val Loss=0.0366
Epoch 10: Train Acc=0.8719, Train Loss=0.0355, Val Acc=0.8720, Val Loss=0.0361


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▄▆▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.87185
train_loss,0.03546
val_acc,0.872
val_loss,0.03607


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a8v48f5f with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.2210, Train Loss=0.2245, Val Acc=0.2142, Val Loss=0.2245
Epoch 2: Train Acc=0.2763, Train Loss=0.2158, Val Acc=0.2694, Val Loss=0.2158
Epoch 3: Train Acc=0.3170, Train Loss=0.2019, Val Acc=0.3136, Val Loss=0.2018
Epoch 4: Train Acc=0.3872, Train Loss=0.1842, Val Acc=0.3868, Val Loss=0.1840
Epoch 5: Train Acc=0.5237, Train Loss=0.1664, Val Acc=0.5294, Val Loss=0.1658
Epoch 6: Train Acc=0.5761, Train Loss=0.1497, Val Acc=0.5906, Val Loss=0.1488
Epoch 7: Train Acc=0.5924, Train Loss=0.1345, Val Acc=0.6008, Val Loss=0.1334
Epoch 8: Train Acc=0.6077, Train Loss=0.1219, Val Acc=0.6144, Val Loss=0.1207
Epoch 9: Train Acc=0.6231, Train Loss=0.1120, Val Acc=0.6304, Val Loss=0.1109
Epoch 10: Train Acc=0.6365, Train Loss=0.1044, Val Acc=0.6444, Val Loss=0.1034


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▃▄▆▇▇███
train_loss,█▇▇▆▅▄▃▂▁▁
val_acc,▁▂▃▄▆▇▇███
val_loss,█▇▇▆▅▄▃▂▁▁

0,1
epoch,10.0
train_acc,0.63645
train_loss,0.10437
val_acc,0.6444
val_loss,0.10341


[34m[1mwandb[0m: Agent Starting Run: l5anb90m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8482, Train Loss=0.0430, Val Acc=0.8508, Val Loss=0.0424
Epoch 2: Train Acc=0.8636, Train Loss=0.0384, Val Acc=0.8636, Val Loss=0.0384
Epoch 3: Train Acc=0.8708, Train Loss=0.0365, Val Acc=0.8680, Val Loss=0.0369
Epoch 4: Train Acc=0.8753, Train Loss=0.0352, Val Acc=0.8728, Val Loss=0.0360
Epoch 5: Train Acc=0.8788, Train Loss=0.0342, Val Acc=0.8746, Val Loss=0.0353


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.87878
train_loss,0.03422
val_acc,0.8746
val_loss,0.03528


[34m[1mwandb[0m: Agent Starting Run: rc520p0e with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8229, Train Loss=0.0519, Val Acc=0.8280, Val Loss=0.0510
Epoch 2: Train Acc=0.8467, Train Loss=0.0441, Val Acc=0.8490, Val Loss=0.0438
Epoch 3: Train Acc=0.8560, Train Loss=0.0410, Val Acc=0.8538, Val Loss=0.0411
Epoch 4: Train Acc=0.8629, Train Loss=0.0383, Val Acc=0.8594, Val Loss=0.0391
Epoch 5: Train Acc=0.8696, Train Loss=0.0361, Val Acc=0.8668, Val Loss=0.0374


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.86964
train_loss,0.03614
val_acc,0.8668
val_loss,0.03742


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t7fdw08e with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8280, Train Loss=0.0496, Val Acc=0.8290, Val Loss=0.0491
Epoch 2: Train Acc=0.8416, Train Loss=0.0448, Val Acc=0.8458, Val Loss=0.0443
Epoch 3: Train Acc=0.8489, Train Loss=0.0425, Val Acc=0.8536, Val Loss=0.0421
Epoch 4: Train Acc=0.8535, Train Loss=0.0409, Val Acc=0.8610, Val Loss=0.0407
Epoch 5: Train Acc=0.8575, Train Loss=0.0398, Val Acc=0.8622, Val Loss=0.0397


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆██
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.85751
train_loss,0.03977
val_acc,0.8622
val_loss,0.03971


[34m[1mwandb[0m: Agent Starting Run: vmsdusf3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6505, Train Loss=0.1365, Val Acc=0.6544, Val Loss=0.1358
Epoch 2: Train Acc=0.6860, Train Loss=0.1050, Val Acc=0.6872, Val Loss=0.1044
Epoch 3: Train Acc=0.7195, Train Loss=0.0891, Val Acc=0.7174, Val Loss=0.0885
Epoch 4: Train Acc=0.7471, Train Loss=0.0797, Val Acc=0.7448, Val Loss=0.0791
Epoch 5: Train Acc=0.7675, Train Loss=0.0731, Val Acc=0.7704, Val Loss=0.0724


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▇█
train_loss,█▅▃▂▁
val_acc,▁▃▅▆█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.76749
train_loss,0.07307
val_acc,0.7704
val_loss,0.0724


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: blfqmefs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7215, Train Loss=0.0829, Val Acc=0.7260, Val Loss=0.0819
Epoch 2: Train Acc=0.7794, Train Loss=0.0660, Val Acc=0.7804, Val Loss=0.0650
Epoch 3: Train Acc=0.8029, Train Loss=0.0579, Val Acc=0.8096, Val Loss=0.0569
Epoch 4: Train Acc=0.8160, Train Loss=0.0532, Val Acc=0.8222, Val Loss=0.0522
Epoch 5: Train Acc=0.8237, Train Loss=0.0502, Val Acc=0.8306, Val Loss=0.0492
Epoch 6: Train Acc=0.8299, Train Loss=0.0481, Val Acc=0.8376, Val Loss=0.0472
Epoch 7: Train Acc=0.8349, Train Loss=0.0465, Val Acc=0.8428, Val Loss=0.0457
Epoch 8: Train Acc=0.8387, Train Loss=0.0453, Val Acc=0.8474, Val Loss=0.0445
Epoch 9: Train Acc=0.8419, Train Loss=0.0442, Val Acc=0.8510, Val Loss=0.0435
Epoch 10: Train Acc=0.8451, Train Loss=0.0433, Val Acc=0.8532, Val Loss=0.0427


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▆▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.84513
train_loss,0.04329
val_acc,0.8532
val_loss,0.04271


[34m[1mwandb[0m: Agent Starting Run: scbf7oh0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8393, Train Loss=0.0465, Val Acc=0.8432, Val Loss=0.0451
Epoch 2: Train Acc=0.8584, Train Loss=0.0405, Val Acc=0.8608, Val Loss=0.0404
Epoch 3: Train Acc=0.8686, Train Loss=0.0368, Val Acc=0.8672, Val Loss=0.0379
Epoch 4: Train Acc=0.8752, Train Loss=0.0347, Val Acc=0.8720, Val Loss=0.0366
Epoch 5: Train Acc=0.8805, Train Loss=0.0329, Val Acc=0.8742, Val Loss=0.0354
Epoch 6: Train Acc=0.8840, Train Loss=0.0316, Val Acc=0.8756, Val Loss=0.0346
Epoch 7: Train Acc=0.8870, Train Loss=0.0306, Val Acc=0.8770, Val Loss=0.0342
Epoch 8: Train Acc=0.8904, Train Loss=0.0296, Val Acc=0.8756, Val Loss=0.0337
Epoch 9: Train Acc=0.8945, Train Loss=0.0283, Val Acc=0.8796, Val Loss=0.0329
Epoch 10: Train Acc=0.8961, Train Loss=0.0277, Val Acc=0.8798, Val Loss=0.0329


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▅▆▇▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▄▆▇▇▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.89613
train_loss,0.02767
val_acc,0.8798
val_loss,0.03288


[34m[1mwandb[0m: Agent Starting Run: 1t75c1kt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8148, Train Loss=0.0528, Val Acc=0.8190, Val Loss=0.0518
Epoch 2: Train Acc=0.8467, Train Loss=0.0432, Val Acc=0.8498, Val Loss=0.0427
Epoch 3: Train Acc=0.8602, Train Loss=0.0396, Val Acc=0.8566, Val Loss=0.0398
Epoch 4: Train Acc=0.8659, Train Loss=0.0376, Val Acc=0.8606, Val Loss=0.0383
Epoch 5: Train Acc=0.8703, Train Loss=0.0363, Val Acc=0.8612, Val Loss=0.0374


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▄▂▂▁
val_acc,▁▆▇██
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.87025
train_loss,0.03628
val_acc,0.8612
val_loss,0.03741


[34m[1mwandb[0m: Agent Starting Run: kbzee4pt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.4261, Train Loss=0.1276, Val Acc=0.4334, Val Loss=0.1277
Epoch 2: Train Acc=0.5822, Train Loss=0.0946, Val Acc=0.5818, Val Loss=0.0946
Epoch 3: Train Acc=0.7135, Train Loss=0.0763, Val Acc=0.7132, Val Loss=0.0762
Epoch 4: Train Acc=0.7731, Train Loss=0.0634, Val Acc=0.7778, Val Loss=0.0637
Epoch 5: Train Acc=0.8011, Train Loss=0.0545, Val Acc=0.8052, Val Loss=0.0550


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.80109
train_loss,0.05451
val_acc,0.8052
val_loss,0.05496


[34m[1mwandb[0m: Agent Starting Run: kve7r13x with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8092, Train Loss=0.0572, Val Acc=0.8114, Val Loss=0.0569
Epoch 2: Train Acc=0.8377, Train Loss=0.0461, Val Acc=0.8426, Val Loss=0.0455
Epoch 3: Train Acc=0.8498, Train Loss=0.0421, Val Acc=0.8530, Val Loss=0.0416
Epoch 4: Train Acc=0.8580, Train Loss=0.0398, Val Acc=0.8592, Val Loss=0.0396
Epoch 5: Train Acc=0.8632, Train Loss=0.0382, Val Acc=0.8624, Val Loss=0.0383
Epoch 6: Train Acc=0.8674, Train Loss=0.0370, Val Acc=0.8642, Val Loss=0.0374
Epoch 7: Train Acc=0.8705, Train Loss=0.0360, Val Acc=0.8670, Val Loss=0.0367
Epoch 8: Train Acc=0.8738, Train Loss=0.0352, Val Acc=0.8682, Val Loss=0.0361
Epoch 9: Train Acc=0.8764, Train Loss=0.0344, Val Acc=0.8690, Val Loss=0.0356
Epoch 10: Train Acc=0.8786, Train Loss=0.0337, Val Acc=0.8704, Val Loss=0.0352


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_acc,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.87858
train_loss,0.03373
val_acc,0.8704
val_loss,0.03517


[34m[1mwandb[0m: Agent Starting Run: zhysix9o with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8612, Train Loss=0.0385, Val Acc=0.8600, Val Loss=0.0390
Epoch 2: Train Acc=0.8776, Train Loss=0.0340, Val Acc=0.8732, Val Loss=0.0354
Epoch 3: Train Acc=0.8856, Train Loss=0.0314, Val Acc=0.8760, Val Loss=0.0336
Epoch 4: Train Acc=0.8908, Train Loss=0.0298, Val Acc=0.8796, Val Loss=0.0325
Epoch 5: Train Acc=0.8949, Train Loss=0.0285, Val Acc=0.8816, Val Loss=0.0318


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.89493
train_loss,0.02851
val_acc,0.8816
val_loss,0.03179


[34m[1mwandb[0m: Agent Starting Run: llcj6cjs with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8231, Train Loss=0.0513, Val Acc=0.8306, Val Loss=0.0506
Epoch 2: Train Acc=0.8380, Train Loss=0.0463, Val Acc=0.8448, Val Loss=0.0456
Epoch 3: Train Acc=0.8479, Train Loss=0.0437, Val Acc=0.8540, Val Loss=0.0432
Epoch 4: Train Acc=0.8533, Train Loss=0.0420, Val Acc=0.8574, Val Loss=0.0417
Epoch 5: Train Acc=0.8570, Train Loss=0.0407, Val Acc=0.8584, Val Loss=0.0406
Epoch 6: Train Acc=0.8605, Train Loss=0.0396, Val Acc=0.8628, Val Loss=0.0397
Epoch 7: Train Acc=0.8634, Train Loss=0.0387, Val Acc=0.8654, Val Loss=0.0390
Epoch 8: Train Acc=0.8661, Train Loss=0.0380, Val Acc=0.8666, Val Loss=0.0383
Epoch 9: Train Acc=0.8689, Train Loss=0.0373, Val Acc=0.8672, Val Loss=0.0378
Epoch 10: Train Acc=0.8706, Train Loss=0.0366, Val Acc=0.8672, Val Loss=0.0373


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▅▆▇▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▄▅▆▆▇████
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.87062
train_loss,0.03665
val_acc,0.8672
val_loss,0.03734


[34m[1mwandb[0m: Agent Starting Run: 607bf9lk with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8364, Train Loss=0.0441, Val Acc=0.8392, Val Loss=0.0449
Epoch 2: Train Acc=0.8333, Train Loss=0.0478, Val Acc=0.8396, Val Loss=0.0503
Epoch 3: Train Acc=0.8461, Train Loss=0.0446, Val Acc=0.8440, Val Loss=0.0483
Epoch 4: Train Acc=0.8586, Train Loss=0.0412, Val Acc=0.8644, Val Loss=0.0450
Epoch 5: Train Acc=0.8373, Train Loss=0.0492, Val Acc=0.8416, Val Loss=0.0538


0,1
epoch,▁▃▅▆█
train_acc,▂▁▅█▂
train_loss,▄▇▄▁█
val_acc,▁▁▂█▂
val_loss,▁▅▄▁█

0,1
epoch,5.0
train_acc,0.83729
train_loss,0.04922
val_acc,0.8416
val_loss,0.05379


[34m[1mwandb[0m: Agent Starting Run: 5y2z1i3f with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5627, Train Loss=0.0993, Val Acc=0.5676, Val Loss=0.0993
Epoch 2: Train Acc=0.7720, Train Loss=0.0629, Val Acc=0.7726, Val Loss=0.0629
Epoch 3: Train Acc=0.8239, Train Loss=0.0536, Val Acc=0.8242, Val Loss=0.0551
Epoch 4: Train Acc=0.8553, Train Loss=0.0447, Val Acc=0.8488, Val Loss=0.0472
Epoch 5: Train Acc=0.8589, Train Loss=0.0428, Val Acc=0.8528, Val Loss=0.0457
Epoch 6: Train Acc=0.8661, Train Loss=0.0405, Val Acc=0.8602, Val Loss=0.0433
Epoch 7: Train Acc=0.8708, Train Loss=0.0386, Val Acc=0.8654, Val Loss=0.0420
Epoch 8: Train Acc=0.8771, Train Loss=0.0365, Val Acc=0.8688, Val Loss=0.0409
Epoch 9: Train Acc=0.8818, Train Loss=0.0347, Val Acc=0.8714, Val Loss=0.0394
Epoch 10: Train Acc=0.8841, Train Loss=0.0341, Val Acc=0.8740, Val Loss=0.0391


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▄▃▂▂▂▁▁▁▁
val_acc,▁▆▇▇██████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.88413
train_loss,0.0341
val_acc,0.874
val_loss,0.03908


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7ohdbq5i with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6239, Train Loss=0.0903, Val Acc=0.6318, Val Loss=0.0900
Epoch 2: Train Acc=0.7190, Train Loss=0.0717, Val Acc=0.7242, Val Loss=0.0715
Epoch 3: Train Acc=0.7404, Train Loss=0.0634, Val Acc=0.7416, Val Loss=0.0632
Epoch 4: Train Acc=0.7681, Train Loss=0.0566, Val Acc=0.7716, Val Loss=0.0570
Epoch 5: Train Acc=0.8279, Train Loss=0.0489, Val Acc=0.8254, Val Loss=0.0506


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▆█
train_loss,█▅▃▂▁
val_acc,▁▄▅▆█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.82785
train_loss,0.04894
val_acc,0.8254
val_loss,0.05056


[34m[1mwandb[0m: Agent Starting Run: y22ydmg9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 6: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 7: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 8: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 9: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 10: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▇▆▅▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.23028
val_acc,0.0976
val_loss,0.23032


[34m[1mwandb[0m: Agent Starting Run: kp8di6bq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5535, Train Loss=0.1599, Val Acc=0.5504, Val Loss=0.1601
Epoch 2: Train Acc=0.6456, Train Loss=0.1204, Val Acc=0.6470, Val Loss=0.1202
Epoch 3: Train Acc=0.6871, Train Loss=0.0986, Val Acc=0.6910, Val Loss=0.0982
Epoch 4: Train Acc=0.7155, Train Loss=0.0863, Val Acc=0.7164, Val Loss=0.0859
Epoch 5: Train Acc=0.7398, Train Loss=0.0782, Val Acc=0.7392, Val Loss=0.0778
Epoch 6: Train Acc=0.7575, Train Loss=0.0721, Val Acc=0.7588, Val Loss=0.0716
Epoch 7: Train Acc=0.7699, Train Loss=0.0673, Val Acc=0.7748, Val Loss=0.0666
Epoch 8: Train Acc=0.7797, Train Loss=0.0636, Val Acc=0.7860, Val Loss=0.0627
Epoch 9: Train Acc=0.7893, Train Loss=0.0606, Val Acc=0.7982, Val Loss=0.0596
Epoch 10: Train Acc=0.7979, Train Loss=0.0582, Val Acc=0.8086, Val Loss=0.0571


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.79795
train_loss,0.05819
val_acc,0.8086
val_loss,0.05713


[34m[1mwandb[0m: Agent Starting Run: 3fmwgpg5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5581, Train Loss=0.1077, Val Acc=0.5550, Val Loss=0.1068
Epoch 2: Train Acc=0.5991, Train Loss=0.1009, Val Acc=0.6060, Val Loss=0.0995
Epoch 3: Train Acc=0.6273, Train Loss=0.0966, Val Acc=0.6358, Val Loss=0.0953
Epoch 4: Train Acc=0.6479, Train Loss=0.0934, Val Acc=0.6540, Val Loss=0.0921
Epoch 5: Train Acc=0.6646, Train Loss=0.0907, Val Acc=0.6714, Val Loss=0.0896
Epoch 6: Train Acc=0.6788, Train Loss=0.0885, Val Acc=0.6850, Val Loss=0.0875
Epoch 7: Train Acc=0.6897, Train Loss=0.0866, Val Acc=0.6964, Val Loss=0.0858
Epoch 8: Train Acc=0.6988, Train Loss=0.0850, Val Acc=0.7038, Val Loss=0.0843
Epoch 9: Train Acc=0.7063, Train Loss=0.0835, Val Acc=0.7112, Val Loss=0.0830
Epoch 10: Train Acc=0.7125, Train Loss=0.0820, Val Acc=0.7196, Val Loss=0.0817


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▃▄▅▆▇▇▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.71249
train_loss,0.08204
val_acc,0.7196
val_loss,0.08166


[34m[1mwandb[0m: Agent Starting Run: 3gq5sidf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8041, Train Loss=0.0558, Val Acc=0.8120, Val Loss=0.0550
Epoch 2: Train Acc=0.8248, Train Loss=0.0500, Val Acc=0.8310, Val Loss=0.0491
Epoch 3: Train Acc=0.8343, Train Loss=0.0471, Val Acc=0.8398, Val Loss=0.0462
Epoch 4: Train Acc=0.8421, Train Loss=0.0452, Val Acc=0.8496, Val Loss=0.0444
Epoch 5: Train Acc=0.8468, Train Loss=0.0437, Val Acc=0.8522, Val Loss=0.0430
Epoch 6: Train Acc=0.8508, Train Loss=0.0425, Val Acc=0.8554, Val Loss=0.0419
Epoch 7: Train Acc=0.8545, Train Loss=0.0416, Val Acc=0.8604, Val Loss=0.0411
Epoch 8: Train Acc=0.8572, Train Loss=0.0407, Val Acc=0.8624, Val Loss=0.0404
Epoch 9: Train Acc=0.8599, Train Loss=0.0400, Val Acc=0.8656, Val Loss=0.0398
Epoch 10: Train Acc=0.8621, Train Loss=0.0394, Val Acc=0.8684, Val Loss=0.0392


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_acc,▁▃▄▆▆▆▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.86213
train_loss,0.03939
val_acc,0.8684
val_loss,0.03923


[34m[1mwandb[0m: Agent Starting Run: 36yj0g94 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8526, Train Loss=0.0424, Val Acc=0.8514, Val Loss=0.0413
Epoch 2: Train Acc=0.8658, Train Loss=0.0383, Val Acc=0.8632, Val Loss=0.0379
Epoch 3: Train Acc=0.8732, Train Loss=0.0363, Val Acc=0.8692, Val Loss=0.0364
Epoch 4: Train Acc=0.8778, Train Loss=0.0348, Val Acc=0.8736, Val Loss=0.0355
Epoch 5: Train Acc=0.8813, Train Loss=0.0337, Val Acc=0.8750, Val Loss=0.0348
Epoch 6: Train Acc=0.8840, Train Loss=0.0328, Val Acc=0.8770, Val Loss=0.0343
Epoch 7: Train Acc=0.8866, Train Loss=0.0320, Val Acc=0.8780, Val Loss=0.0340
Epoch 8: Train Acc=0.8890, Train Loss=0.0314, Val Acc=0.8786, Val Loss=0.0337
Epoch 9: Train Acc=0.8911, Train Loss=0.0308, Val Acc=0.8796, Val Loss=0.0335
Epoch 10: Train Acc=0.8931, Train Loss=0.0303, Val Acc=0.8806, Val Loss=0.0333


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▅▆▆▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.89309
train_loss,0.0303
val_acc,0.8806
val_loss,0.03327


[34m[1mwandb[0m: Agent Starting Run: 3ejtwsxa with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8603, Train Loss=0.0391, Val Acc=0.8594, Val Loss=0.0390
Epoch 2: Train Acc=0.8705, Train Loss=0.0356, Val Acc=0.8690, Val Loss=0.0365
Epoch 3: Train Acc=0.8789, Train Loss=0.0332, Val Acc=0.8716, Val Loss=0.0350
Epoch 4: Train Acc=0.8849, Train Loss=0.0314, Val Acc=0.8756, Val Loss=0.0339
Epoch 5: Train Acc=0.8895, Train Loss=0.0301, Val Acc=0.8784, Val Loss=0.0331
Epoch 6: Train Acc=0.8928, Train Loss=0.0291, Val Acc=0.8808, Val Loss=0.0326
Epoch 7: Train Acc=0.8957, Train Loss=0.0283, Val Acc=0.8816, Val Loss=0.0322
Epoch 8: Train Acc=0.8979, Train Loss=0.0276, Val Acc=0.8812, Val Loss=0.0319
Epoch 9: Train Acc=0.9000, Train Loss=0.0270, Val Acc=0.8828, Val Loss=0.0318
Epoch 10: Train Acc=0.9016, Train Loss=0.0265, Val Acc=0.8822, Val Loss=0.0317


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▇▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_acc,▁▄▅▆▇▇████
val_loss,█▆▄▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.90164
train_loss,0.02646
val_acc,0.8822
val_loss,0.03169


[34m[1mwandb[0m: Agent Starting Run: yhj0ay2e with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6512, Train Loss=0.1207, Val Acc=0.6504, Val Loss=0.1199
Epoch 2: Train Acc=0.7210, Train Loss=0.0909, Val Acc=0.7238, Val Loss=0.0901
Epoch 3: Train Acc=0.7635, Train Loss=0.0756, Val Acc=0.7670, Val Loss=0.0748
Epoch 4: Train Acc=0.7926, Train Loss=0.0661, Val Acc=0.7940, Val Loss=0.0654
Epoch 5: Train Acc=0.8091, Train Loss=0.0596, Val Acc=0.8110, Val Loss=0.0590
Epoch 6: Train Acc=0.8200, Train Loss=0.0550, Val Acc=0.8244, Val Loss=0.0546
Epoch 7: Train Acc=0.8274, Train Loss=0.0516, Val Acc=0.8340, Val Loss=0.0514
Epoch 8: Train Acc=0.8341, Train Loss=0.0491, Val Acc=0.8392, Val Loss=0.0490
Epoch 9: Train Acc=0.8394, Train Loss=0.0471, Val Acc=0.8456, Val Loss=0.0471
Epoch 10: Train Acc=0.8442, Train Loss=0.0456, Val Acc=0.8482, Val Loss=0.0457


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.84422
train_loss,0.04557
val_acc,0.8482
val_loss,0.04567


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hmfl7t9k with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8459, Train Loss=0.0436, Val Acc=0.8482, Val Loss=0.0431
Epoch 2: Train Acc=0.8643, Train Loss=0.0383, Val Acc=0.8674, Val Loss=0.0385
Epoch 3: Train Acc=0.8718, Train Loss=0.0360, Val Acc=0.8728, Val Loss=0.0366
Epoch 4: Train Acc=0.8755, Train Loss=0.0345, Val Acc=0.8746, Val Loss=0.0354
Epoch 5: Train Acc=0.8792, Train Loss=0.0333, Val Acc=0.8754, Val Loss=0.0346
Epoch 6: Train Acc=0.8817, Train Loss=0.0325, Val Acc=0.8774, Val Loss=0.0340
Epoch 7: Train Acc=0.8854, Train Loss=0.0316, Val Acc=0.8818, Val Loss=0.0335
Epoch 8: Train Acc=0.8880, Train Loss=0.0309, Val Acc=0.8834, Val Loss=0.0331
Epoch 9: Train Acc=0.8906, Train Loss=0.0302, Val Acc=0.8832, Val Loss=0.0328
Epoch 10: Train Acc=0.8929, Train Loss=0.0296, Val Acc=0.8828, Val Loss=0.0325


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▅▆▆▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▅▆▆▆▇████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.89285
train_loss,0.02961
val_acc,0.8828
val_loss,0.03253


[34m[1mwandb[0m: Agent Starting Run: 8rhzzkiy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▄▄▃▂▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▄▄▅▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7uo9r251 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6638, Train Loss=0.1122, Val Acc=0.6730, Val Loss=0.1108
Epoch 2: Train Acc=0.7229, Train Loss=0.0855, Val Acc=0.7290, Val Loss=0.0843
Epoch 3: Train Acc=0.7567, Train Loss=0.0735, Val Acc=0.7614, Val Loss=0.0723
Epoch 4: Train Acc=0.7773, Train Loss=0.0663, Val Acc=0.7816, Val Loss=0.0652
Epoch 5: Train Acc=0.7928, Train Loss=0.0614, Val Acc=0.7958, Val Loss=0.0603


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.79276
train_loss,0.06139
val_acc,0.7958
val_loss,0.06029


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hjicc1zd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8241, Train Loss=0.0508, Val Acc=0.8268, Val Loss=0.0499
Epoch 2: Train Acc=0.8484, Train Loss=0.0433, Val Acc=0.8522, Val Loss=0.0424
Epoch 3: Train Acc=0.8594, Train Loss=0.0397, Val Acc=0.8602, Val Loss=0.0396
Epoch 4: Train Acc=0.8678, Train Loss=0.0374, Val Acc=0.8672, Val Loss=0.0380
Epoch 5: Train Acc=0.8731, Train Loss=0.0358, Val Acc=0.8718, Val Loss=0.0369


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.87315
train_loss,0.03578
val_acc,0.8718
val_loss,0.03687


[34m[1mwandb[0m: Agent Starting Run: g9brdo4w with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7223, Train Loss=0.0879, Val Acc=0.7278, Val Loss=0.0872
Epoch 2: Train Acc=0.7770, Train Loss=0.0672, Val Acc=0.7814, Val Loss=0.0665
Epoch 3: Train Acc=0.8039, Train Loss=0.0578, Val Acc=0.8084, Val Loss=0.0571
Epoch 4: Train Acc=0.8182, Train Loss=0.0525, Val Acc=0.8232, Val Loss=0.0518
Epoch 5: Train Acc=0.8271, Train Loss=0.0492, Val Acc=0.8314, Val Loss=0.0486


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.82715
train_loss,0.04924
val_acc,0.8314
val_loss,0.04858


[34m[1mwandb[0m: Agent Starting Run: i3cjmz3s with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7985, Train Loss=0.0586, Val Acc=0.8074, Val Loss=0.0579
Epoch 2: Train Acc=0.8204, Train Loss=0.0515, Val Acc=0.8290, Val Loss=0.0507
Epoch 3: Train Acc=0.8308, Train Loss=0.0484, Val Acc=0.8374, Val Loss=0.0476
Epoch 4: Train Acc=0.8375, Train Loss=0.0464, Val Acc=0.8462, Val Loss=0.0456
Epoch 5: Train Acc=0.8420, Train Loss=0.0450, Val Acc=0.8508, Val Loss=0.0443
Epoch 6: Train Acc=0.8460, Train Loss=0.0439, Val Acc=0.8528, Val Loss=0.0432
Epoch 7: Train Acc=0.8488, Train Loss=0.0430, Val Acc=0.8572, Val Loss=0.0424
Epoch 8: Train Acc=0.8517, Train Loss=0.0421, Val Acc=0.8606, Val Loss=0.0417
Epoch 9: Train Acc=0.8539, Train Loss=0.0414, Val Acc=0.8618, Val Loss=0.0411
Epoch 10: Train Acc=0.8562, Train Loss=0.0408, Val Acc=0.8626, Val Loss=0.0406


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.85622
train_loss,0.0408
val_acc,0.8626
val_loss,0.04059


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 98zw4h99 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1000, Train Loss=0.2366, Val Acc=0.1002, Val Loss=0.2356
Epoch 2: Train Acc=0.1000, Train Loss=0.2342, Val Acc=0.1002, Val Loss=0.2334
Epoch 3: Train Acc=0.1000, Train Loss=0.2327, Val Acc=0.1002, Val Loss=0.2320
Epoch 4: Train Acc=0.1000, Train Loss=0.2317, Val Acc=0.1002, Val Loss=0.2312
Epoch 5: Train Acc=0.1000, Train Loss=0.2311, Val Acc=0.1002, Val Loss=0.2307


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▅▃▂▁
val_acc,▁▁▁▁▁
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.09998
train_loss,0.23111
val_acc,0.1002
val_loss,0.23074


[34m[1mwandb[0m: Agent Starting Run: hofb6in9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▅▅▄▃▂▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▄▅▆▆▇▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 61czv4mr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7059, Train Loss=0.0760, Val Acc=0.7166, Val Loss=0.0759
Epoch 2: Train Acc=0.8046, Train Loss=0.0562, Val Acc=0.8086, Val Loss=0.0558
Epoch 3: Train Acc=0.8382, Train Loss=0.0481, Val Acc=0.8378, Val Loss=0.0485
Epoch 4: Train Acc=0.8536, Train Loss=0.0442, Val Acc=0.8476, Val Loss=0.0457
Epoch 5: Train Acc=0.8602, Train Loss=0.0420, Val Acc=0.8540, Val Loss=0.0444


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇██
train_loss,█▄▂▁▁
val_acc,▁▆▇██
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.86022
train_loss,0.04203
val_acc,0.854
val_loss,0.04442


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ktwole8t with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁▇███
val_acc,▁▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: bo88104z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1517, Train Loss=0.2288, Val Acc=0.1464, Val Loss=0.2287
Epoch 2: Train Acc=0.1647, Train Loss=0.2277, Val Acc=0.1610, Val Loss=0.2276
Epoch 3: Train Acc=0.1753, Train Loss=0.2266, Val Acc=0.1732, Val Loss=0.2265
Epoch 4: Train Acc=0.1853, Train Loss=0.2254, Val Acc=0.1832, Val Loss=0.2253
Epoch 5: Train Acc=0.1941, Train Loss=0.2240, Val Acc=0.1938, Val Loss=0.2239


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▇█
train_loss,█▆▅▃▁
val_acc,▁▃▅▆█
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.19413
train_loss,0.224
val_acc,0.1938
val_loss,0.22388


[34m[1mwandb[0m: Agent Starting Run: i5q6v08p with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Agent Starting Run: tx1ic2r6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁██▇▆
val_acc,▁▁▁▁▁
val_loss,▁▇███

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: 0t0io6kv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.4222, Train Loss=0.1351, Val Acc=0.4276, Val Loss=0.1357
Epoch 2: Train Acc=0.6171, Train Loss=0.0996, Val Acc=0.6206, Val Loss=0.1005
Epoch 3: Train Acc=0.6679, Train Loss=0.0891, Val Acc=0.6714, Val Loss=0.0901
Epoch 4: Train Acc=0.7147, Train Loss=0.0800, Val Acc=0.7204, Val Loss=0.0812
Epoch 5: Train Acc=0.7542, Train Loss=0.0714, Val Acc=0.7548, Val Loss=0.0729
Epoch 6: Train Acc=0.7672, Train Loss=0.0670, Val Acc=0.7680, Val Loss=0.0683
Epoch 7: Train Acc=0.7734, Train Loss=0.0640, Val Acc=0.7772, Val Loss=0.0653
Epoch 8: Train Acc=0.7793, Train Loss=0.0618, Val Acc=0.7796, Val Loss=0.0631
Epoch 9: Train Acc=0.7847, Train Loss=0.0602, Val Acc=0.7880, Val Loss=0.0615
Epoch 10: Train Acc=0.7887, Train Loss=0.0589, Val Acc=0.7924, Val Loss=0.0602


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▇▇█████
train_loss,█▅▄▃▂▂▁▁▁▁
val_acc,▁▅▆▇▇█████
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.78871
train_loss,0.0589
val_acc,0.7924
val_loss,0.06019


[34m[1mwandb[0m: Agent Starting Run: u47emdiv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,████▇▇▆▅▄▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▇███▇▇▆▆▄▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23023
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l35pwlb3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.3874, Train Loss=0.2209, Val Acc=0.3938, Val Loss=0.2208
Epoch 2: Train Acc=0.4435, Train Loss=0.2147, Val Acc=0.4468, Val Loss=0.2144
Epoch 3: Train Acc=0.4421, Train Loss=0.2091, Val Acc=0.4456, Val Loss=0.2087
Epoch 4: Train Acc=0.4449, Train Loss=0.2038, Val Acc=0.4462, Val Loss=0.2034
Epoch 5: Train Acc=0.4495, Train Loss=0.1988, Val Acc=0.4490, Val Loss=0.1983
Epoch 6: Train Acc=0.4537, Train Loss=0.1939, Val Acc=0.4544, Val Loss=0.1934
Epoch 7: Train Acc=0.4585, Train Loss=0.1891, Val Acc=0.4594, Val Loss=0.1886
Epoch 8: Train Acc=0.4616, Train Loss=0.1845, Val Acc=0.4648, Val Loss=0.1840
Epoch 9: Train Acc=0.4648, Train Loss=0.1800, Val Acc=0.4690, Val Loss=0.1794
Epoch 10: Train Acc=0.4659, Train Loss=0.1755, Val Acc=0.4698, Val Loss=0.1749


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▆▆▇▇▇███
train_loss,█▇▆▅▅▄▃▂▂▁
val_acc,▁▆▆▆▆▇▇███
val_loss,█▇▆▅▅▄▃▂▂▁

0,1
epoch,10.0
train_acc,0.46585
train_loss,0.17547
val_acc,0.4698
val_loss,0.17492


[34m[1mwandb[0m: Agent Starting Run: m4er5l2k with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1795, Train Loss=0.2273, Val Acc=0.1788, Val Loss=0.2273
Epoch 2: Train Acc=0.2040, Train Loss=0.2252, Val Acc=0.2086, Val Loss=0.2251
Epoch 3: Train Acc=0.2236, Train Loss=0.2232, Val Acc=0.2270, Val Loss=0.2231
Epoch 4: Train Acc=0.2389, Train Loss=0.2209, Val Acc=0.2454, Val Loss=0.2207
Epoch 5: Train Acc=0.2524, Train Loss=0.2180, Val Acc=0.2592, Val Loss=0.2179


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▇█
train_loss,█▆▅▃▁
val_acc,▁▄▅▇█
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.25244
train_loss,0.21802
val_acc,0.2592
val_loss,0.21786


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rsvywb7f with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4389, Train Loss=0.1732, Val Acc=0.4402, Val Loss=0.1729
Epoch 2: Train Acc=0.6477, Train Loss=0.1017, Val Acc=0.6576, Val Loss=0.1005
Epoch 3: Train Acc=0.6909, Train Loss=0.0819, Val Acc=0.7002, Val Loss=0.0810
Epoch 4: Train Acc=0.7254, Train Loss=0.0732, Val Acc=0.7320, Val Loss=0.0725
Epoch 5: Train Acc=0.7568, Train Loss=0.0675, Val Acc=0.7622, Val Loss=0.0668


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇▇█
train_loss,█▃▂▁▁
val_acc,▁▆▇▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,0.75678
train_loss,0.06751
val_acc,0.7622
val_loss,0.06678


[34m[1mwandb[0m: Agent Starting Run: 8kanzc75 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1939, Train Loss=0.2298, Val Acc=0.1912, Val Loss=0.2298
Epoch 2: Train Acc=0.2342, Train Loss=0.2292, Val Acc=0.2318, Val Loss=0.2292
Epoch 3: Train Acc=0.3035, Train Loss=0.2284, Val Acc=0.3032, Val Loss=0.2284
Epoch 4: Train Acc=0.3578, Train Loss=0.2271, Val Acc=0.3630, Val Loss=0.2271
Epoch 5: Train Acc=0.3643, Train Loss=0.2247, Val Acc=0.3694, Val Loss=0.2246


0,1
epoch,▁▃▅▆█
train_acc,▁▃▆██
train_loss,█▇▆▄▁
val_acc,▁▃▅██
val_loss,█▇▆▄▁

0,1
epoch,5.0
train_acc,0.36435
train_loss,0.2247
val_acc,0.3694
val_loss,0.22456


[34m[1mwandb[0m: Agent Starting Run: fcmwoj92 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▄▂▂▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▃▅▆▆▇▇███

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: myar0o19 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▃▂▁▁
val_acc,▁▁▁▁▁
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: pvte6chk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0980, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.0914, Train Loss=0.2303, Val Acc=0.0912, Val Loss=0.2303
Epoch 4: Train Acc=0.0953, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.0961, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,██▁▄▄
train_loss,█▁▁▁▁
val_acc,█▁▁▁▁
val_loss,▁▇███

0,1
epoch,5.0
train_acc,0.09605
train_loss,0.23026
val_acc,0.0914
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: 09j5ewtg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5675, Train Loss=0.0990, Val Acc=0.5658, Val Loss=0.0990
Epoch 2: Train Acc=0.7414, Train Loss=0.0676, Val Acc=0.7492, Val Loss=0.0680
Epoch 3: Train Acc=0.7690, Train Loss=0.0583, Val Acc=0.7778, Val Loss=0.0587
Epoch 4: Train Acc=0.8116, Train Loss=0.0512, Val Acc=0.8118, Val Loss=0.0522
Epoch 5: Train Acc=0.8489, Train Loss=0.0451, Val Acc=0.8394, Val Loss=0.0475
Epoch 6: Train Acc=0.8593, Train Loss=0.0421, Val Acc=0.8502, Val Loss=0.0455
Epoch 7: Train Acc=0.8659, Train Loss=0.0400, Val Acc=0.8524, Val Loss=0.0444
Epoch 8: Train Acc=0.8719, Train Loss=0.0384, Val Acc=0.8544, Val Loss=0.0435
Epoch 9: Train Acc=0.8754, Train Loss=0.0372, Val Acc=0.8576, Val Loss=0.0429
Epoch 10: Train Acc=0.8774, Train Loss=0.0364, Val Acc=0.8570, Val Loss=0.0427


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▇▇█████
train_loss,█▄▃▃▂▂▁▁▁▁
val_acc,▁▅▆▇██████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.87744
train_loss,0.03641
val_acc,0.857
val_loss,0.04269


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k2geqdsh with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8153, Train Loss=0.0535, Val Acc=0.8234, Val Loss=0.0528
Epoch 2: Train Acc=0.8323, Train Loss=0.0485, Val Acc=0.8400, Val Loss=0.0478
Epoch 3: Train Acc=0.8397, Train Loss=0.0459, Val Acc=0.8480, Val Loss=0.0453
Epoch 4: Train Acc=0.8452, Train Loss=0.0442, Val Acc=0.8554, Val Loss=0.0437
Epoch 5: Train Acc=0.8498, Train Loss=0.0429, Val Acc=0.8596, Val Loss=0.0426
Epoch 6: Train Acc=0.8536, Train Loss=0.0419, Val Acc=0.8622, Val Loss=0.0417
Epoch 7: Train Acc=0.8561, Train Loss=0.0410, Val Acc=0.8656, Val Loss=0.0409
Epoch 8: Train Acc=0.8586, Train Loss=0.0403, Val Acc=0.8688, Val Loss=0.0403
Epoch 9: Train Acc=0.8605, Train Loss=0.0397, Val Acc=0.8692, Val Loss=0.0397
Epoch 10: Train Acc=0.8625, Train Loss=0.0391, Val Acc=0.8696, Val Loss=0.0393


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▅▆▇▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.86255
train_loss,0.03906
val_acc,0.8696
val_loss,0.03926


[34m[1mwandb[0m: Agent Starting Run: h7kjz57d with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2850, Train Loss=0.1744, Val Acc=0.2734, Val Loss=0.1746
Epoch 2: Train Acc=0.3161, Train Loss=0.1523, Val Acc=0.3090, Val Loss=0.1532
Epoch 3: Train Acc=0.5565, Train Loss=0.1125, Val Acc=0.5606, Val Loss=0.1126
Epoch 4: Train Acc=0.7374, Train Loss=0.0772, Val Acc=0.7428, Val Loss=0.0773
Epoch 5: Train Acc=0.7675, Train Loss=0.0688, Val Acc=0.7714, Val Loss=0.0693


0,1
epoch,▁▃▅▆█
train_acc,▁▁▅██
train_loss,█▇▄▂▁
val_acc,▁▂▅██
val_loss,█▇▄▂▁

0,1
epoch,5.0
train_acc,0.76751
train_loss,0.06881
val_acc,0.7714
val_loss,0.06931


[34m[1mwandb[0m: Agent Starting Run: ntbxxysp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▄▂▁▁
val_acc,▁▁▁▁▁
val_loss,▁▄▆▇█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wnumft7s with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2304
Epoch 2: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2301, Val Acc=0.0976, Val Loss=0.2302
Epoch 5: Train Acc=0.1005, Train Loss=0.2300, Val Acc=0.0978, Val Loss=0.2301


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁█
train_loss,█▇▅▃▁
val_acc,▁▁▁▁█
val_loss,█▇▅▃▁

0,1
epoch,5.0
train_acc,0.10047
train_loss,0.23004
val_acc,0.0978
val_loss,0.23011


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4d0ov98g with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▂▅▇██████
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▅▇▇██████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: q9c6kjpv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2000, Train Loss=0.1724, Val Acc=0.1928, Val Loss=0.1723
Epoch 2: Train Acc=0.2005, Train Loss=0.1699, Val Acc=0.1928, Val Loss=0.1697
Epoch 3: Train Acc=0.2014, Train Loss=0.1689, Val Acc=0.2162, Val Loss=0.1687
Epoch 4: Train Acc=0.2013, Train Loss=0.1683, Val Acc=0.2154, Val Loss=0.1681
Epoch 5: Train Acc=0.2021, Train Loss=0.1678, Val Acc=0.2160, Val Loss=0.1676


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▅█
train_loss,█▄▃▂▁
val_acc,▁▁███
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.20215
train_loss,0.1678
val_acc,0.216
val_loss,0.16759


[34m[1mwandb[0m: Agent Starting Run: 9ops62po with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1045, Train Loss=0.2300, Val Acc=0.1180, Val Loss=0.2299
Epoch 2: Train Acc=0.1100, Train Loss=0.2295, Val Acc=0.1230, Val Loss=0.2294
Epoch 3: Train Acc=0.1182, Train Loss=0.2290, Val Acc=0.1308, Val Loss=0.2289
Epoch 4: Train Acc=0.1323, Train Loss=0.2285, Val Acc=0.1456, Val Loss=0.2284
Epoch 5: Train Acc=0.1530, Train Loss=0.2280, Val Acc=0.1666, Val Loss=0.2279


0,1
epoch,▁▃▅▆█
train_acc,▁▂▃▅█
train_loss,█▆▅▃▁
val_acc,▁▂▃▅█
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.153
train_loss,0.22802
val_acc,0.1666
val_loss,0.2279


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rnnbrp9i with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7562, Train Loss=0.0793, Val Acc=0.7596, Val Loss=0.0788
Epoch 2: Train Acc=0.8026, Train Loss=0.0640, Val Acc=0.8028, Val Loss=0.0637
Epoch 3: Train Acc=0.8182, Train Loss=0.0569, Val Acc=0.8194, Val Loss=0.0565
Epoch 4: Train Acc=0.8269, Train Loss=0.0528, Val Acc=0.8292, Val Loss=0.0524
Epoch 5: Train Acc=0.8323, Train Loss=0.0502, Val Acc=0.8358, Val Loss=0.0497
Epoch 6: Train Acc=0.8363, Train Loss=0.0482, Val Acc=0.8404, Val Loss=0.0478
Epoch 7: Train Acc=0.8401, Train Loss=0.0467, Val Acc=0.8434, Val Loss=0.0463
Epoch 8: Train Acc=0.8438, Train Loss=0.0455, Val Acc=0.8492, Val Loss=0.0450
Epoch 9: Train Acc=0.8465, Train Loss=0.0444, Val Acc=0.8502, Val Loss=0.0441
Epoch 10: Train Acc=0.8493, Train Loss=0.0435, Val Acc=0.8528, Val Loss=0.0432


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.84935
train_loss,0.04353
val_acc,0.8528
val_loss,0.04321


[34m[1mwandb[0m: Agent Starting Run: qew9kh42 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2302, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1300, Train Loss=0.2301, Val Acc=0.1224, Val Loss=0.2301


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁█
train_loss,███▇▁
val_acc,▁▁▁▁█
val_loss,███▇▁

0,1
epoch,5.0
train_acc,0.13002
train_loss,0.23009
val_acc,0.1224
val_loss,0.23011


[34m[1mwandb[0m: Agent Starting Run: 98sv54ys with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 2: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 3: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 4: Train Acc=0.1002, Train Loss=0.2307, Val Acc=0.0980, Val Loss=0.2307
Epoch 5: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2307
Epoch 6: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2306
Epoch 7: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2306
Epoch 8: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2306
Epoch 9: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2306
Epoch 10: Train Acc=0.1002, Train Loss=0.2306, Val Acc=0.0980, Val Loss=0.2306


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▅▄▃▂▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▅▅▄▃▂▂▁

0,1
epoch,10.0
train_acc,0.10018
train_loss,0.23058
val_acc,0.098
val_loss,0.23062


[34m[1mwandb[0m: Agent Starting Run: juu6aetc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▇▆▄▁
val_acc,▁▁▁▁▁
val_loss,▇█▇▅▁

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23022
val_acc,0.0914
val_loss,0.23025


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: knovjpri with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6663, Train Loss=0.0909, Val Acc=0.6678, Val Loss=0.0909
Epoch 2: Train Acc=0.7414, Train Loss=0.0734, Val Acc=0.7420, Val Loss=0.0738
Epoch 3: Train Acc=0.7500, Train Loss=0.0676, Val Acc=0.7444, Val Loss=0.0683
Epoch 4: Train Acc=0.7926, Train Loss=0.0582, Val Acc=0.7902, Val Loss=0.0584
Epoch 5: Train Acc=0.8234, Train Loss=0.0512, Val Acc=0.8164, Val Loss=0.0520


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▇█
train_loss,█▅▄▂▁
val_acc,▁▄▅▇█
val_loss,█▅▄▂▁

0,1
epoch,5.0
train_acc,0.82344
train_loss,0.05117
val_acc,0.8164
val_loss,0.05202


[34m[1mwandb[0m: Agent Starting Run: 1fnses7r with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▁▇███████
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: di4tq4wk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 2: Train Acc=0.1919, Train Loss=0.2296, Val Acc=0.1870, Val Loss=0.2296
Epoch 3: Train Acc=0.3335, Train Loss=0.1524, Val Acc=0.3428, Val Loss=0.1519
Epoch 4: Train Acc=0.6090, Train Loss=0.0952, Val Acc=0.6198, Val Loss=0.0947
Epoch 5: Train Acc=0.7036, Train Loss=0.0821, Val Acc=0.7056, Val Loss=0.0820


0,1
epoch,▁▃▅▆█
train_acc,▁▂▄▇█
train_loss,██▄▂▁
val_acc,▁▂▄▇█
val_loss,██▄▂▁

0,1
epoch,5.0
train_acc,0.7036
train_loss,0.08215
val_acc,0.7056
val_loss,0.08202


[34m[1mwandb[0m: Agent Starting Run: kxrqnaqw with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7249, Train Loss=0.0850, Val Acc=0.7280, Val Loss=0.0854
Epoch 2: Train Acc=0.8002, Train Loss=0.0559, Val Acc=0.8006, Val Loss=0.0562
Epoch 3: Train Acc=0.8433, Train Loss=0.0461, Val Acc=0.8450, Val Loss=0.0465
Epoch 4: Train Acc=0.8566, Train Loss=0.0419, Val Acc=0.8568, Val Loss=0.0428
Epoch 5: Train Acc=0.8653, Train Loss=0.0392, Val Acc=0.8628, Val Loss=0.0407
Epoch 6: Train Acc=0.8717, Train Loss=0.0373, Val Acc=0.8658, Val Loss=0.0393
Epoch 7: Train Acc=0.8761, Train Loss=0.0358, Val Acc=0.8706, Val Loss=0.0382
Epoch 8: Train Acc=0.8800, Train Loss=0.0346, Val Acc=0.8714, Val Loss=0.0373
Epoch 9: Train Acc=0.8836, Train Loss=0.0336, Val Acc=0.8732, Val Loss=0.0365
Epoch 10: Train Acc=0.8865, Train Loss=0.0327, Val Acc=0.8742, Val Loss=0.0359


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_acc,▁▄▇▇▇█████
val_loss,█▄▂▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.88649
train_loss,0.03272
val_acc,0.8742
val_loss,0.03593


[34m[1mwandb[0m: Agent Starting Run: kkg77uci with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7653, Train Loss=0.0662, Val Acc=0.7724, Val Loss=0.0657
Epoch 2: Train Acc=0.8151, Train Loss=0.0526, Val Acc=0.8202, Val Loss=0.0520
Epoch 3: Train Acc=0.8285, Train Loss=0.0483, Val Acc=0.8332, Val Loss=0.0476
Epoch 4: Train Acc=0.8368, Train Loss=0.0458, Val Acc=0.8382, Val Loss=0.0453
Epoch 5: Train Acc=0.8420, Train Loss=0.0441, Val Acc=0.8462, Val Loss=0.0438
Epoch 6: Train Acc=0.8485, Train Loss=0.0427, Val Acc=0.8490, Val Loss=0.0427
Epoch 7: Train Acc=0.8528, Train Loss=0.0413, Val Acc=0.8548, Val Loss=0.0416
Epoch 8: Train Acc=0.8568, Train Loss=0.0400, Val Acc=0.8582, Val Loss=0.0406
Epoch 9: Train Acc=0.8608, Train Loss=0.0389, Val Acc=0.8624, Val Loss=0.0397
Epoch 10: Train Acc=0.8644, Train Loss=0.0378, Val Acc=0.8630, Val Loss=0.0390


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▅▆▆▇▇▇███
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.8644
train_loss,0.03783
val_acc,0.863
val_loss,0.03895


[34m[1mwandb[0m: Agent Starting Run: ecxfyz3o with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▃▄▅▆▆▇█

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23026


[34m[1mwandb[0m: Agent Starting Run: 59pgcocr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6596, Train Loss=0.0936, Val Acc=0.6640, Val Loss=0.0933
Epoch 2: Train Acc=0.7091, Train Loss=0.0825, Val Acc=0.7092, Val Loss=0.0823
Epoch 3: Train Acc=0.7504, Train Loss=0.0713, Val Acc=0.7478, Val Loss=0.0715
Epoch 4: Train Acc=0.7661, Train Loss=0.0646, Val Acc=0.7660, Val Loss=0.0652
Epoch 5: Train Acc=0.7724, Train Loss=0.0620, Val Acc=0.7732, Val Loss=0.0627
Epoch 6: Train Acc=0.7783, Train Loss=0.0603, Val Acc=0.7832, Val Loss=0.0611
Epoch 7: Train Acc=0.7927, Train Loss=0.0583, Val Acc=0.7920, Val Loss=0.0596
Epoch 8: Train Acc=0.7895, Train Loss=0.0598, Val Acc=0.7930, Val Loss=0.0618
Epoch 9: Train Acc=0.7871, Train Loss=0.0611, Val Acc=0.7882, Val Loss=0.0636
Epoch 10: Train Acc=0.7985, Train Loss=0.0579, Val Acc=0.7936, Val Loss=0.0606


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▆▆▇▇██▇█
train_loss,█▆▄▂▂▁▁▁▂▁
val_acc,▁▃▆▇▇▇████
val_loss,█▆▃▂▂▁▁▁▂▁

0,1
epoch,10.0
train_acc,0.79855
train_loss,0.05787
val_acc,0.7936
val_loss,0.06063


[34m[1mwandb[0m: Agent Starting Run: lu7obojr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁████
val_acc,▁▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: 9v7cssav with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6706, Train Loss=0.0959, Val Acc=0.6754, Val Loss=0.0949
Epoch 2: Train Acc=0.7377, Train Loss=0.0728, Val Acc=0.7414, Val Loss=0.0721
Epoch 3: Train Acc=0.7798, Train Loss=0.0636, Val Acc=0.7868, Val Loss=0.0628
Epoch 4: Train Acc=0.8019, Train Loss=0.0578, Val Acc=0.8076, Val Loss=0.0571
Epoch 5: Train Acc=0.8141, Train Loss=0.0540, Val Acc=0.8210, Val Loss=0.0533
Epoch 6: Train Acc=0.8232, Train Loss=0.0513, Val Acc=0.8282, Val Loss=0.0506
Epoch 7: Train Acc=0.8287, Train Loss=0.0492, Val Acc=0.8330, Val Loss=0.0486
Epoch 8: Train Acc=0.8335, Train Loss=0.0477, Val Acc=0.8360, Val Loss=0.0472
Epoch 9: Train Acc=0.8373, Train Loss=0.0464, Val Acc=0.8392, Val Loss=0.0460
Epoch 10: Train Acc=0.8406, Train Loss=0.0454, Val Acc=0.8442, Val Loss=0.0451


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇████
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▆▆▇▇████
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.8406
train_loss,0.04538
val_acc,0.8442
val_loss,0.04506


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8v5s30go with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2142, Train Loss=0.1671, Val Acc=0.2108, Val Loss=0.1670
Epoch 2: Train Acc=0.4101, Train Loss=0.1315, Val Acc=0.4156, Val Loss=0.1317
Epoch 3: Train Acc=0.5262, Train Loss=0.1146, Val Acc=0.5268, Val Loss=0.1147
Epoch 4: Train Acc=0.5849, Train Loss=0.1014, Val Acc=0.5886, Val Loss=0.1014
Epoch 5: Train Acc=0.6147, Train Loss=0.0928, Val Acc=0.6220, Val Loss=0.0925


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.61471
train_loss,0.09277
val_acc,0.622
val_loss,0.09249


[34m[1mwandb[0m: Agent Starting Run: 21vat5hk with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 2: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 3: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 4: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 5: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 6: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 7: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 8: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 9: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 10: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▄▃▂▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▅▃▁▁▁▂▃▄▆█

0,1
epoch,10.0
train_acc,0.09993
train_loss,0.23026
val_acc,0.1008
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: 3vtonuiy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▂▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


#Question - 7

In [15]:
def plot_confusion_matrix(y_true, y_pred, config_name):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(10), yticklabels=range(10))
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix - {config_name}')

    wandb.log({f"Confusion Matrix - {config_name}": wandb.Image(plt)})
    plt.close()



def evaluate_best_configs(best_configs):
    x_train, y_train, x_val, y_val, x_test, y_test = Data_Preprocess()
    y_test_labels = np.argmax(y_test, axis=1)

    for config in best_configs:
        wandb.init(project="Vinod_Assignment1_Question7", name=f"Confusion_Matrix_{config['name']}", reinit=True)

        if config['optimizer'] == 'rmsprop':
            trained_weights = RMS_Opt(config['learning_rate'], x_train, y_train, x_val, y_val, config['epochs'], config['activation'], config['num_layers'], config['hidden_size'], config['weight_init'], config['batch_size'], 28*28, 10)
        elif config['optimizer'] == 'adam':
            trained_weights = Adam_Opt(config['learning_rate'], x_train, y_train, x_val, y_val, config['epochs'], config['activation'], config['num_layers'], config['hidden_size'], config['weight_init'], config['batch_size'], 28*28, 10)
        else:
            raise ValueError(f"Unsupported optimizer: {config['optimizer']}")

        y_pred_probs, _ = Forward_Propogation(x_test, trained_weights, config['num_layers'], config['activation'])
        y_pred_labels = np.argmax(y_pred_probs, axis=1)
        plot_confusion_matrix(y_test_labels, y_pred_labels, config['name'])
        wandb.finish()


if __name__ == "__main__":
    best_configs = [
    {
        'name': 'Best_Config_1',
        'epochs': 10,
        'num_layers': 5,
        'hidden_size': 128,
        'learning_rate': 0.001,
        'batch_size': 64,
        'optimizer': 'adam',
        'weight_decay': 0.5,
        'weight_init': 'xavier',
        'activation': 'tanh'
    },
    {
        'name': 'Best_Config_2',
        'epochs': 10,
        'num_layers': 4,
        'hidden_size': 64,
        'learning_rate': 0.001,
        'batch_size': 16,
        'optimizer': 'adam',
        'weight_decay': 0.0005,
        'weight_init': 'xavier',
        'activation': 'relu'
    }
]
evaluate_best_configs(best_configs)



Epoch 1: Train Acc=0.8551, Train Loss=0.0390, Val Acc=0.8534, Val Loss=0.0397
Epoch 2: Train Acc=0.8758, Train Loss=0.0339, Val Acc=0.8696, Val Loss=0.0353
Epoch 3: Train Acc=0.8834, Train Loss=0.0317, Val Acc=0.8754, Val Loss=0.0336
Epoch 4: Train Acc=0.8884, Train Loss=0.0303, Val Acc=0.8826, Val Loss=0.0327
Epoch 5: Train Acc=0.8925, Train Loss=0.0292, Val Acc=0.8858, Val Loss=0.0320
Epoch 6: Train Acc=0.8960, Train Loss=0.0283, Val Acc=0.8886, Val Loss=0.0316
Epoch 7: Train Acc=0.8983, Train Loss=0.0275, Val Acc=0.8884, Val Loss=0.0312
Epoch 8: Train Acc=0.9008, Train Loss=0.0268, Val Acc=0.8884, Val Loss=0.0310
Epoch 9: Train Acc=0.9027, Train Loss=0.0261, Val Acc=0.8886, Val Loss=0.0308
Epoch 10: Train Acc=0.9045, Train Loss=0.0256, Val Acc=0.8886, Val Loss=0.0307


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▄▅▇▇█████
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.90445
train_loss,0.02555
val_acc,0.8886
val_loss,0.03071


Epoch 1: Train Acc=0.8523, Train Loss=0.0412, Val Acc=0.8528, Val Loss=0.0408
Epoch 2: Train Acc=0.8686, Train Loss=0.0362, Val Acc=0.8706, Val Loss=0.0368
Epoch 3: Train Acc=0.8780, Train Loss=0.0334, Val Acc=0.8772, Val Loss=0.0348
Epoch 4: Train Acc=0.8823, Train Loss=0.0320, Val Acc=0.8798, Val Loss=0.0337
Epoch 5: Train Acc=0.8873, Train Loss=0.0307, Val Acc=0.8834, Val Loss=0.0328
Epoch 6: Train Acc=0.8910, Train Loss=0.0295, Val Acc=0.8852, Val Loss=0.0321
Epoch 7: Train Acc=0.8942, Train Loss=0.0288, Val Acc=0.8880, Val Loss=0.0316
Epoch 8: Train Acc=0.8969, Train Loss=0.0280, Val Acc=0.8898, Val Loss=0.0313
Epoch 9: Train Acc=0.8993, Train Loss=0.0272, Val Acc=0.8894, Val Loss=0.0310
Epoch 10: Train Acc=0.9016, Train Loss=0.0267, Val Acc=0.8904, Val Loss=0.0309


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▅▆▇▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▄▆▆▇▇████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.90158
train_loss,0.02673
val_acc,0.8904
val_loss,0.03085


In [16]:
import wandb
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import pandas as pd

def plot_confusion_matrix(y_true, y_pred, config_name, class_names):
    """Logs an interactive confusion matrix to WandB."""
    cm = confusion_matrix(y_true, y_pred)

    # Log interactive Confusion Matrix
    wandb.log({
        f"Confusion Matrix - {config_name}": wandb.plot.confusion_matrix(
            probs=None,
            y_true=y_true,
            preds=y_pred,
            class_names=class_names
        )
    })


def evaluate_best_configs(best_configs):
    x_train, y_train, x_val, y_val, x_test, y_test = Data_Preprocess()
    y_test_labels = np.argmax(y_test, axis=1)

    class_names = ['Ankle boot', 'Bag', 'Coat', 'Dress', 'Pullover', 'Sandal', 'Shirt', 'Sneaker', 'T-shirt/top', 'Trouser']

    for config in best_configs:
        wandb.init(project="Vinod_Assignment1_Question7_A", name=f"Confusion_Matrix_{config['name']}", reinit=True)

        if config['optimizer'] == 'rmsprop':
            trained_weights = RMS_Opt(config['learning_rate'], x_train, y_train, x_val, y_val, config['epochs'], config['activation'], config['num_layers'], config['hidden_size'], config['weight_init'], config['batch_size'], 28*28, 10)
        elif config['optimizer'] == 'adam':
            trained_weights = Adam_Opt(config['learning_rate'], x_train, y_train, x_val, y_val, config['epochs'], config['activation'], config['num_layers'], config['hidden_size'], config['weight_init'], config['batch_size'], 28*28, 10)
        else:
            raise ValueError(f"Unsupported optimizer: {config['optimizer']}")

        y_pred_probs, _ = Forward_Propogation(x_test, trained_weights, config['num_layers'], config['activation'])
        y_pred_labels = np.argmax(y_pred_probs, axis=1)

        # Log interactive confusion matrix
        plot_confusion_matrix(y_test_labels, y_pred_labels, config['name'], class_names)

        wandb.finish()


if __name__ == "__main__":
    best_configs = [
    {
        'name': 'Best_Config_1',
        'epochs': 10,
        'num_layers': 5,
        'hidden_size': 128,
        'learning_rate': 0.001,
        'batch_size': 64,
        'optimizer': 'adam',
        'weight_decay': 0.5,
        'weight_init': 'xavier',
        'activation': 'tanh'
    },
    {
        'name': 'Best_Config_2',
        'epochs': 10,
        'num_layers': 4,
        'hidden_size': 64,
        'learning_rate': 0.001,
        'batch_size': 16,
        'optimizer': 'adam',
        'weight_decay': 0.0005,
        'weight_init': 'xavier',
        'activation': 'relu'
    }
]
evaluate_best_configs(best_configs)


Epoch 1: Train Acc=0.8550, Train Loss=0.0393, Val Acc=0.8504, Val Loss=0.0402
Epoch 2: Train Acc=0.8776, Train Loss=0.0336, Val Acc=0.8714, Val Loss=0.0352
Epoch 3: Train Acc=0.8849, Train Loss=0.0314, Val Acc=0.8784, Val Loss=0.0335
Epoch 4: Train Acc=0.8890, Train Loss=0.0301, Val Acc=0.8814, Val Loss=0.0325
Epoch 5: Train Acc=0.8924, Train Loss=0.0290, Val Acc=0.8824, Val Loss=0.0319
Epoch 6: Train Acc=0.8958, Train Loss=0.0281, Val Acc=0.8830, Val Loss=0.0314
Epoch 7: Train Acc=0.8990, Train Loss=0.0273, Val Acc=0.8844, Val Loss=0.0310
Epoch 8: Train Acc=0.9009, Train Loss=0.0267, Val Acc=0.8850, Val Loss=0.0307
Epoch 9: Train Acc=0.9027, Train Loss=0.0261, Val Acc=0.8854, Val Loss=0.0305
Epoch 10: Train Acc=0.9041, Train Loss=0.0255, Val Acc=0.8860, Val Loss=0.0304


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▅▇▇▇▇████
val_loss,█▄▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.90407
train_loss,0.02553
val_acc,0.886
val_loss,0.0304


Epoch 1: Train Acc=0.8523, Train Loss=0.0424, Val Acc=0.8556, Val Loss=0.0416
Epoch 2: Train Acc=0.8651, Train Loss=0.0378, Val Acc=0.8658, Val Loss=0.0380
Epoch 3: Train Acc=0.8740, Train Loss=0.0348, Val Acc=0.8742, Val Loss=0.0360
Epoch 4: Train Acc=0.8804, Train Loss=0.0330, Val Acc=0.8774, Val Loss=0.0350
Epoch 5: Train Acc=0.8847, Train Loss=0.0317, Val Acc=0.8778, Val Loss=0.0344
Epoch 6: Train Acc=0.8886, Train Loss=0.0305, Val Acc=0.8800, Val Loss=0.0338
Epoch 7: Train Acc=0.8915, Train Loss=0.0294, Val Acc=0.8840, Val Loss=0.0333
Epoch 8: Train Acc=0.8946, Train Loss=0.0285, Val Acc=0.8842, Val Loss=0.0330
Epoch 9: Train Acc=0.8964, Train Loss=0.0280, Val Acc=0.8850, Val Loss=0.0328
Epoch 10: Train Acc=0.8984, Train Loss=0.0273, Val Acc=0.8862, Val Loss=0.0326


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▇▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_acc,▁▃▅▆▆▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.89842
train_loss,0.02732
val_acc,0.8862
val_loss,0.03264


#Question - 8

In [28]:
def main():
    sweep_config = {
        'method': 'bayes',
        'metric': {'name': 'accuracy', 'goal': 'maximize'},
        'parameters': {
            'epochs': {'values': [5, 10]},
            'num_layers': {'values': [3, 4, 5]},
            'hidden_size': {'values': [32, 64, 128]},
            'weight_decay': {'values': [0, 0.0005, 0.5]},
            'learning_rate': {'values': [1e-3, 1e-4]},
            'optimizer': {'values': ['stochastic', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']},
            'batch_size': {'values': [16, 32, 64]},
            'weight_init': {'values': ['random', 'xavier']},
            'activation': {'values': ['sigmoid', 'tanh', 'relu']},
        }
    }
    sweep_id = wandb.sweep(sweep_config, project="Vinod_Assignment1_Question8_mse")
    wandb.agent(sweep_id, function=train, count=50)

def train():
    wandb.init(project="Vinod_Assignment1_Question8_mse")
    # wandb.init()
    config = wandb.config
    run_name = f"Opt-{config.optimizer}_Layers-{config.num_layers}_HS-{config.hidden_size}_LR-{config.learning_rate}_Batch-{config.batch_size}_Act-{config.activation}"
    wandb.run.name = run_name

    # x_train, y_train, x_val, y_val, _, _ = Data_Preprocess()

    optimizer = config.optimizer

    if optimizer == 'stochastic':
        trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
        # trained_weights = Stochastic_GD(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size)
    elif optimizer == 'momentum':
        trained_weights = Momentum_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
    elif optimizer == 'nag':
        trained_weights = Nesterov_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
    elif optimizer == 'rmsprop':
        trained_weights = RMS_Opt(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
    elif optimizer == 'adam':
        trained_weights = Adam_Opt(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')
    elif optimizer == 'nadam':
        trained_weights = NAdam_Opt(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mse')

    #wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss})

    wandb.finish()

if __name__ == "__main__":
    main()


Create sweep with ID: 106csc1h
Sweep URL: https://wandb.ai/viinod9-iitm/Vinod_Assignment1_Question8_mse/sweeps/106csc1h


[34m[1mwandb[0m: Agent Starting Run: 3z0xbc30 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6337, Train Loss=0.0626, Val Acc=0.6424, Val Loss=0.0621
Epoch 2: Train Acc=0.6799, Train Loss=0.0494, Val Acc=0.6870, Val Loss=0.0487
Epoch 3: Train Acc=0.7142, Train Loss=0.0429, Val Acc=0.7236, Val Loss=0.0422
Epoch 4: Train Acc=0.7395, Train Loss=0.0390, Val Acc=0.7466, Val Loss=0.0383
Epoch 5: Train Acc=0.7579, Train Loss=0.0361, Val Acc=0.7660, Val Loss=0.0353


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.75795
train_loss,0.03607
val_acc,0.766
val_loss,0.03533


[34m[1mwandb[0m: Agent Starting Run: sqa0hrdz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 2: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 6: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 7: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 8: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 9: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 10: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▇████████
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▇████████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09


[34m[1mwandb[0m: Agent Starting Run: mvqsl9h6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2583, Train Loss=0.0802, Val Acc=0.2508, Val Loss=0.0802
Epoch 2: Train Acc=0.3652, Train Loss=0.0689, Val Acc=0.3586, Val Loss=0.0687
Epoch 3: Train Acc=0.4561, Train Loss=0.0632, Val Acc=0.4564, Val Loss=0.0627
Epoch 4: Train Acc=0.4845, Train Loss=0.0604, Val Acc=0.4820, Val Loss=0.0599
Epoch 5: Train Acc=0.5069, Train Loss=0.0587, Val Acc=0.5046, Val Loss=0.0581


0,1
epoch,▁▃▅▆█
train_acc,▁▄▇▇█
train_loss,█▄▂▂▁
val_acc,▁▄▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.50687
train_loss,0.05867
val_acc,0.5046
val_loss,0.05811


[34m[1mwandb[0m: Agent Starting Run: uw6tf3n8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.2022, Train Loss=0.0802, Val Acc=0.2012, Val Loss=0.0802
Epoch 2: Train Acc=0.2046, Train Loss=0.0801, Val Acc=0.2044, Val Loss=0.0801
Epoch 3: Train Acc=0.3085, Train Loss=0.0725, Val Acc=0.2966, Val Loss=0.0727
Epoch 4: Train Acc=0.3866, Train Loss=0.0711, Val Acc=0.3782, Val Loss=0.0713
Epoch 5: Train Acc=0.4553, Train Loss=0.0658, Val Acc=0.4618, Val Loss=0.0659
Epoch 6: Train Acc=0.4846, Train Loss=0.0610, Val Acc=0.4928, Val Loss=0.0610
Epoch 7: Train Acc=0.5341, Train Loss=0.0570, Val Acc=0.5402, Val Loss=0.0569
Epoch 8: Train Acc=0.6014, Train Loss=0.0534, Val Acc=0.6078, Val Loss=0.0533
Epoch 9: Train Acc=0.6303, Train Loss=0.0502, Val Acc=0.6354, Val Loss=0.0500
Epoch 10: Train Acc=0.6498, Train Loss=0.0473, Val Acc=0.6514, Val Loss=0.0471


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▃▄▅▅▆▇██
train_loss,██▆▆▅▄▃▂▂▁
val_acc,▁▁▂▄▅▆▆▇██
val_loss,██▆▆▅▄▃▂▂▁

0,1
epoch,10.0
train_acc,0.6498
train_loss,0.04732
val_acc,0.6514
val_loss,0.04715


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8pkzte18 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1927, Train Loss=0.0856, Val Acc=0.1894, Val Loss=0.0856
Epoch 2: Train Acc=0.1996, Train Loss=0.0806, Val Acc=0.1980, Val Loss=0.0806
Epoch 3: Train Acc=0.2000, Train Loss=0.0805, Val Acc=0.1990, Val Loss=0.0805
Epoch 4: Train Acc=0.2007, Train Loss=0.0805, Val Acc=0.1996, Val Loss=0.0804
Epoch 5: Train Acc=0.2011, Train Loss=0.0804, Val Acc=0.2002, Val Loss=0.0804


0,1
epoch,▁▃▅▆█
train_acc,▁▇▇██
train_loss,█▁▁▁▁
val_acc,▁▇▇██
val_loss,█▁▁▁▁

0,1
epoch,5.0
train_acc,0.20105
train_loss,0.08044
val_acc,0.2002
val_loss,0.08042


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7ot6pv6c with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.2948, Train Loss=0.0830, Val Acc=0.2972, Val Loss=0.0830
Epoch 2: Train Acc=0.4328, Train Loss=0.0773, Val Acc=0.4360, Val Loss=0.0773
Epoch 3: Train Acc=0.5428, Train Loss=0.0630, Val Acc=0.5482, Val Loss=0.0628
Epoch 4: Train Acc=0.6565, Train Loss=0.0438, Val Acc=0.6670, Val Loss=0.0431
Epoch 5: Train Acc=0.6991, Train Loss=0.0386, Val Acc=0.7040, Val Loss=0.0378
Epoch 6: Train Acc=0.7270, Train Loss=0.0357, Val Acc=0.7358, Val Loss=0.0348
Epoch 7: Train Acc=0.7482, Train Loss=0.0335, Val Acc=0.7580, Val Loss=0.0326
Epoch 8: Train Acc=0.7659, Train Loss=0.0318, Val Acc=0.7748, Val Loss=0.0309
Epoch 9: Train Acc=0.7795, Train Loss=0.0304, Val Acc=0.7854, Val Loss=0.0294
Epoch 10: Train Acc=0.7913, Train Loss=0.0292, Val Acc=0.7990, Val Loss=0.0282


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▆▇▇▇███
train_loss,█▇▅▃▂▂▂▁▁▁
val_acc,▁▃▅▆▇▇▇███
val_loss,█▇▅▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.79135
train_loss,0.02919
val_acc,0.799
val_loss,0.02822


[34m[1mwandb[0m: Agent Starting Run: 7e8b5oya with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.0922, Val Acc=0.0976, Val Loss=0.0921
Epoch 2: Train Acc=0.1002, Train Loss=0.0918, Val Acc=0.0976, Val Loss=0.0917
Epoch 3: Train Acc=0.1002, Train Loss=0.0916, Val Acc=0.0976, Val Loss=0.0915
Epoch 4: Train Acc=0.1002, Train Loss=0.0913, Val Acc=0.0976, Val Loss=0.0912
Epoch 5: Train Acc=0.1002, Train Loss=0.0912, Val Acc=0.0976, Val Loss=0.0910
Epoch 6: Train Acc=0.1002, Train Loss=0.0910, Val Acc=0.0976, Val Loss=0.0909
Epoch 7: Train Acc=0.1002, Train Loss=0.0908, Val Acc=0.0976, Val Loss=0.0907
Epoch 8: Train Acc=0.1002, Train Loss=0.0907, Val Acc=0.0976, Val Loss=0.0906
Epoch 9: Train Acc=0.1002, Train Loss=0.0906, Val Acc=0.0976, Val Loss=0.0905
Epoch 10: Train Acc=0.1002, Train Loss=0.0905, Val Acc=0.0976, Val Loss=0.0904


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▅▄▄▃▂▂▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▅▄▄▃▂▂▁▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.09054
val_acc,0.0976
val_loss,0.09045


[34m[1mwandb[0m: Agent Starting Run: nmlpk5ql with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 2: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 6: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 7: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 8: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 9: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 10: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▂▅▇▇▇████
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▅▇▇██████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 04b7mm6b with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8319, Train Loss=0.0233, Val Acc=0.8378, Val Loss=0.0224
Epoch 2: Train Acc=0.8581, Train Loss=0.0196, Val Acc=0.8626, Val Loss=0.0197
Epoch 3: Train Acc=0.8752, Train Loss=0.0174, Val Acc=0.8744, Val Loss=0.0181
Epoch 4: Train Acc=0.8795, Train Loss=0.0170, Val Acc=0.8762, Val Loss=0.0179
Epoch 5: Train Acc=0.8830, Train Loss=0.0164, Val Acc=0.8772, Val Loss=0.0179
Epoch 6: Train Acc=0.8937, Train Loss=0.0150, Val Acc=0.8852, Val Loss=0.0169
Epoch 7: Train Acc=0.8907, Train Loss=0.0153, Val Acc=0.8810, Val Loss=0.0174
Epoch 8: Train Acc=0.8877, Train Loss=0.0159, Val Acc=0.8748, Val Loss=0.0182
Epoch 9: Train Acc=0.8954, Train Loss=0.0148, Val Acc=0.8818, Val Loss=0.0174
Epoch 10: Train Acc=0.8930, Train Loss=0.0152, Val Acc=0.8752, Val Loss=0.0182


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▆▇█▇▇██
train_loss,█▅▃▃▂▁▁▂▁▁
val_acc,▁▅▆▇▇█▇▆▇▇
val_loss,█▅▃▂▂▁▂▃▂▃

0,1
epoch,10.0
train_acc,0.89304
train_loss,0.01518
val_acc,0.8752
val_loss,0.01817


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8qwz8otz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7289, Train Loss=0.0366, Val Acc=0.7312, Val Loss=0.0364
Epoch 2: Train Acc=0.7817, Train Loss=0.0295, Val Acc=0.7836, Val Loss=0.0293
Epoch 3: Train Acc=0.8111, Train Loss=0.0269, Val Acc=0.8136, Val Loss=0.0268
Epoch 4: Train Acc=0.8269, Train Loss=0.0249, Val Acc=0.8262, Val Loss=0.0249
Epoch 5: Train Acc=0.8341, Train Loss=0.0237, Val Acc=0.8328, Val Loss=0.0238


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆██
train_loss,█▄▃▂▁
val_acc,▁▅▇██
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.83409
train_loss,0.02372
val_acc,0.8328
val_loss,0.02381


[34m[1mwandb[0m: Agent Starting Run: 1gpodgqf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 2: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 3: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 4: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 5: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 6: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 7: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 8: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 9: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 10: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▅▄▃▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▆▅▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.10022
train_loss,0.09001
val_acc,0.0976
val_loss,0.09003


[34m[1mwandb[0m: Agent Starting Run: kj9p2m92 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8088, Train Loss=0.0284, Val Acc=0.8102, Val Loss=0.0279
Epoch 2: Train Acc=0.8368, Train Loss=0.0238, Val Acc=0.8380, Val Loss=0.0233
Epoch 3: Train Acc=0.8480, Train Loss=0.0219, Val Acc=0.8478, Val Loss=0.0216
Epoch 4: Train Acc=0.8554, Train Loss=0.0207, Val Acc=0.8554, Val Loss=0.0208
Epoch 5: Train Acc=0.8594, Train Loss=0.0201, Val Acc=0.8574, Val Loss=0.0204


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇██
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.85938
train_loss,0.02006
val_acc,0.8574
val_loss,0.02037


[34m[1mwandb[0m: Agent Starting Run: h3plvnp2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1164, Train Loss=0.0897, Val Acc=0.1202, Val Loss=0.0897
Epoch 2: Train Acc=0.2534, Train Loss=0.0890, Val Acc=0.2562, Val Loss=0.0889
Epoch 3: Train Acc=0.3495, Train Loss=0.0882, Val Acc=0.3536, Val Loss=0.0882
Epoch 4: Train Acc=0.4127, Train Loss=0.0875, Val Acc=0.4192, Val Loss=0.0874
Epoch 5: Train Acc=0.4467, Train Loss=0.0867, Val Acc=0.4526, Val Loss=0.0866


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▆▅▃▁
val_acc,▁▄▆▇█
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.44673
train_loss,0.08669
val_acc,0.4526
val_loss,0.08663


[34m[1mwandb[0m: Agent Starting Run: a85erwbu with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901
Epoch 2: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901
Epoch 3: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901
Epoch 4: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901
Epoch 5: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▄▃▁

0,1
epoch,5.0
train_acc,0.10018
train_loss,0.09008
val_acc,0.098
val_loss,0.09008


[34m[1mwandb[0m: Agent Starting Run: o0cnpnya with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.8452, Train Loss=0.0224, Val Acc=0.8530, Val Loss=0.0219
Epoch 2: Train Acc=0.8625, Train Loss=0.0198, Val Acc=0.8628, Val Loss=0.0198
Epoch 3: Train Acc=0.8704, Train Loss=0.0186, Val Acc=0.8706, Val Loss=0.0190
Epoch 4: Train Acc=0.8763, Train Loss=0.0177, Val Acc=0.8724, Val Loss=0.0185
Epoch 5: Train Acc=0.8807, Train Loss=0.0171, Val Acc=0.8726, Val Loss=0.0182


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▇██
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.88065
train_loss,0.01707
val_acc,0.8726
val_loss,0.01819


[34m[1mwandb[0m: Agent Starting Run: u2pqeed8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8071, Train Loss=0.0277, Val Acc=0.8084, Val Loss=0.0270
Epoch 2: Train Acc=0.8291, Train Loss=0.0248, Val Acc=0.8368, Val Loss=0.0241
Epoch 3: Train Acc=0.8389, Train Loss=0.0234, Val Acc=0.8444, Val Loss=0.0226
Epoch 4: Train Acc=0.8455, Train Loss=0.0225, Val Acc=0.8512, Val Loss=0.0217
Epoch 5: Train Acc=0.8501, Train Loss=0.0218, Val Acc=0.8546, Val Loss=0.0210


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.85009
train_loss,0.02177
val_acc,0.8546
val_loss,0.02103


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5d041r19 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 2: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 3: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 4: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 5: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁████
val_acc,▁▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.09
val_acc,0.0976
val_loss,0.09001


[34m[1mwandb[0m: Agent Starting Run: 0mwgdhym with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 2: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 6: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 7: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 8: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 9: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 10: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁████████
train_loss,█▁▁▁▁▁▁▁▁▁
val_acc,██▁▁▁▁▁▁▁▁
val_loss,▁▆████████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09


[34m[1mwandb[0m: Agent Starting Run: 23w2tt7u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 2: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▅▄▂▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09


[34m[1mwandb[0m: Agent Starting Run: raasxdsr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8648, Train Loss=0.0194, Val Acc=0.8654, Val Loss=0.0192
Epoch 2: Train Acc=0.8718, Train Loss=0.0181, Val Acc=0.8718, Val Loss=0.0183
Epoch 3: Train Acc=0.8820, Train Loss=0.0168, Val Acc=0.8788, Val Loss=0.0175
Epoch 4: Train Acc=0.8893, Train Loss=0.0158, Val Acc=0.8826, Val Loss=0.0168
Epoch 5: Train Acc=0.8963, Train Loss=0.0149, Val Acc=0.8866, Val Loss=0.0163
Epoch 6: Train Acc=0.8996, Train Loss=0.0144, Val Acc=0.8850, Val Loss=0.0161
Epoch 7: Train Acc=0.9023, Train Loss=0.0139, Val Acc=0.8860, Val Loss=0.0161
Epoch 8: Train Acc=0.9052, Train Loss=0.0135, Val Acc=0.8872, Val Loss=0.0159
Epoch 9: Train Acc=0.9080, Train Loss=0.0131, Val Acc=0.8880, Val Loss=0.0159
Epoch 10: Train Acc=0.9110, Train Loss=0.0127, Val Acc=0.8892, Val Loss=0.0159


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▄▅▆▆▇▇██
train_loss,█▇▅▄▃▃▂▂▁▁
val_acc,▁▃▅▆▇▇▇▇██
val_loss,█▆▄▃▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.911
train_loss,0.01267
val_acc,0.8892
val_loss,0.01593


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8plbn958 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1011, Train Loss=0.0900, Val Acc=0.1004, Val Loss=0.0900
Epoch 2: Train Acc=0.1612, Train Loss=0.0899, Val Acc=0.1504, Val Loss=0.0899
Epoch 3: Train Acc=0.1688, Train Loss=0.0899, Val Acc=0.1590, Val Loss=0.0899
Epoch 4: Train Acc=0.1931, Train Loss=0.0898, Val Acc=0.1864, Val Loss=0.0898
Epoch 5: Train Acc=0.2312, Train Loss=0.0897, Val Acc=0.2258, Val Loss=0.0897


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▆█
train_loss,█▆▅▃▁
val_acc,▁▄▄▆█
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.23124
train_loss,0.08973
val_acc,0.2258
val_loss,0.08973


[34m[1mwandb[0m: Agent Starting Run: jr7uk16w with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8520, Train Loss=0.0211, Val Acc=0.8556, Val Loss=0.0208
Epoch 2: Train Acc=0.8655, Train Loss=0.0191, Val Acc=0.8652, Val Loss=0.0193
Epoch 3: Train Acc=0.8724, Train Loss=0.0181, Val Acc=0.8666, Val Loss=0.0186
Epoch 4: Train Acc=0.8781, Train Loss=0.0173, Val Acc=0.8714, Val Loss=0.0180
Epoch 5: Train Acc=0.8826, Train Loss=0.0166, Val Acc=0.8760, Val Loss=0.0175
Epoch 6: Train Acc=0.8881, Train Loss=0.0158, Val Acc=0.8792, Val Loss=0.0169
Epoch 7: Train Acc=0.8910, Train Loss=0.0154, Val Acc=0.8806, Val Loss=0.0166
Epoch 8: Train Acc=0.8942, Train Loss=0.0150, Val Acc=0.8802, Val Loss=0.0165
Epoch 9: Train Acc=0.8973, Train Loss=0.0146, Val Acc=0.8828, Val Loss=0.0163
Epoch 10: Train Acc=0.8989, Train Loss=0.0144, Val Acc=0.8828, Val Loss=0.0162


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▃▄▅▆▇▇▇██
val_loss,█▆▅▄▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.89895
train_loss,0.01437
val_acc,0.8828
val_loss,0.01625


[34m[1mwandb[0m: Agent Starting Run: pcq6zl3j with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0853, Train Loss=0.0900, Val Acc=0.0820, Val Loss=0.0900
Epoch 2: Train Acc=0.0957, Train Loss=0.0900, Val Acc=0.0864, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▃▅▆█
train_acc,▁▆███
train_loss,██▅▅▁
val_acc,▁▄███
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09


[34m[1mwandb[0m: Agent Starting Run: 354eb5i3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.2010, Train Loss=0.0832, Val Acc=0.2072, Val Loss=0.0832
Epoch 2: Train Acc=0.3814, Train Loss=0.0751, Val Acc=0.3932, Val Loss=0.0750
Epoch 3: Train Acc=0.3876, Train Loss=0.0685, Val Acc=0.4016, Val Loss=0.0680
Epoch 4: Train Acc=0.3937, Train Loss=0.0652, Val Acc=0.4062, Val Loss=0.0644
Epoch 5: Train Acc=0.4500, Train Loss=0.0629, Val Acc=0.4584, Val Loss=0.0621
Epoch 6: Train Acc=0.4745, Train Loss=0.0609, Val Acc=0.4812, Val Loss=0.0602
Epoch 7: Train Acc=0.5088, Train Loss=0.0592, Val Acc=0.5176, Val Loss=0.0586
Epoch 8: Train Acc=0.5509, Train Loss=0.0557, Val Acc=0.5594, Val Loss=0.0551
Epoch 9: Train Acc=0.6261, Train Loss=0.0510, Val Acc=0.6362, Val Loss=0.0505
Epoch 10: Train Acc=0.6373, Train Loss=0.0475, Val Acc=0.6462, Val Loss=0.0472


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▄▄▅▅▆▇██
train_loss,█▆▅▄▄▄▃▃▂▁
val_acc,▁▄▄▄▅▅▆▇██
val_loss,█▆▅▄▄▄▃▃▂▁

0,1
epoch,10.0
train_acc,0.63733
train_loss,0.04753
val_acc,0.6462
val_loss,0.04718


[34m[1mwandb[0m: Agent Starting Run: i5lde0qa with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1644, Train Loss=0.0889, Val Acc=0.1656, Val Loss=0.0888
Epoch 2: Train Acc=0.2942, Train Loss=0.0870, Val Acc=0.2976, Val Loss=0.0870
Epoch 3: Train Acc=0.4056, Train Loss=0.0839, Val Acc=0.4154, Val Loss=0.0838
Epoch 4: Train Acc=0.5181, Train Loss=0.0784, Val Acc=0.5282, Val Loss=0.0782
Epoch 5: Train Acc=0.5532, Train Loss=0.0712, Val Acc=0.5644, Val Loss=0.0709
Epoch 6: Train Acc=0.5804, Train Loss=0.0643, Val Acc=0.5906, Val Loss=0.0638
Epoch 7: Train Acc=0.6334, Train Loss=0.0585, Val Acc=0.6484, Val Loss=0.0579
Epoch 8: Train Acc=0.6478, Train Loss=0.0538, Val Acc=0.6572, Val Loss=0.0531
Epoch 9: Train Acc=0.6593, Train Loss=0.0499, Val Acc=0.6656, Val Loss=0.0492
Epoch 10: Train Acc=0.6735, Train Loss=0.0469, Val Acc=0.6812, Val Loss=0.0461


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▆▆▇▇███
train_loss,██▇▆▅▄▃▂▂▁
val_acc,▁▃▄▆▆▇████
val_loss,██▇▆▅▄▃▂▂▁

0,1
epoch,10.0
train_acc,0.67351
train_loss,0.04689
val_acc,0.6812
val_loss,0.0461


[34m[1mwandb[0m: Agent Starting Run: uodtdgpy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8335, Train Loss=0.0244, Val Acc=0.8376, Val Loss=0.0235
Epoch 2: Train Acc=0.8517, Train Loss=0.0215, Val Acc=0.8516, Val Loss=0.0211
Epoch 3: Train Acc=0.8599, Train Loss=0.0202, Val Acc=0.8582, Val Loss=0.0202
Epoch 4: Train Acc=0.8657, Train Loss=0.0194, Val Acc=0.8622, Val Loss=0.0197
Epoch 5: Train Acc=0.8710, Train Loss=0.0185, Val Acc=0.8684, Val Loss=0.0191


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.87096
train_loss,0.01852
val_acc,0.8684
val_loss,0.01908


[34m[1mwandb[0m: Agent Starting Run: ohx6ufi9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 2: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 3: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 4: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 5: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▄▃▁

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.09001
val_acc,0.0976
val_loss,0.09003


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ylqkafgq with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6781, Train Loss=0.0421, Val Acc=0.6818, Val Loss=0.0418
Epoch 2: Train Acc=0.7824, Train Loss=0.0312, Val Acc=0.7870, Val Loss=0.0309
Epoch 3: Train Acc=0.7967, Train Loss=0.0280, Val Acc=0.7972, Val Loss=0.0280
Epoch 4: Train Acc=0.8065, Train Loss=0.0262, Val Acc=0.8040, Val Loss=0.0265
Epoch 5: Train Acc=0.8121, Train Loss=0.0254, Val Acc=0.8088, Val Loss=0.0261
Epoch 6: Train Acc=0.8227, Train Loss=0.0243, Val Acc=0.8172, Val Loss=0.0252
Epoch 7: Train Acc=0.8380, Train Loss=0.0227, Val Acc=0.8308, Val Loss=0.0238
Epoch 8: Train Acc=0.8549, Train Loss=0.0213, Val Acc=0.8468, Val Loss=0.0226
Epoch 9: Train Acc=0.8641, Train Loss=0.0202, Val Acc=0.8532, Val Loss=0.0217
Epoch 10: Train Acc=0.8668, Train Loss=0.0197, Val Acc=0.8544, Val Loss=0.0215


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▅▆▆▆▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▅▆▆▆▆▇███
val_loss,█▄▃▃▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.86676
train_loss,0.01965
val_acc,0.8544
val_loss,0.0215


[34m[1mwandb[0m: Agent Starting Run: dsn4c3kl with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.6500, Train Loss=0.0454, Val Acc=0.6606, Val Loss=0.0448
Epoch 2: Train Acc=0.7006, Train Loss=0.0415, Val Acc=0.7084, Val Loss=0.0411
Epoch 3: Train Acc=0.7183, Train Loss=0.0396, Val Acc=0.7210, Val Loss=0.0393
Epoch 4: Train Acc=0.7256, Train Loss=0.0386, Val Acc=0.7246, Val Loss=0.0384
Epoch 5: Train Acc=0.7280, Train Loss=0.0380, Val Acc=0.7242, Val Loss=0.0379


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇██
train_loss,█▄▃▂▁
val_acc,▁▆███
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.72802
train_loss,0.03802
val_acc,0.7242
val_loss,0.03791


[34m[1mwandb[0m: Agent Starting Run: 9s3ztit3 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901
Epoch 2: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901
Epoch 3: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901
Epoch 4: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901
Epoch 5: Train Acc=0.1002, Train Loss=0.0901, Val Acc=0.0980, Val Loss=0.0901


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▅▃▁
val_acc,▁▁▁▁▁
val_loss,█▆▅▃▁

0,1
epoch,5.0
train_acc,0.10018
train_loss,0.09007
val_acc,0.098
val_loss,0.09007


[34m[1mwandb[0m: Agent Starting Run: 04d77rs7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.0900, Val Acc=0.0976, Val Loss=0.0900
Epoch 2: Train Acc=0.0998, Train Loss=0.0900, Val Acc=0.0986, Val Loss=0.0900
Epoch 3: Train Acc=0.0987, Train Loss=0.0900, Val Acc=0.0988, Val Loss=0.0900
Epoch 4: Train Acc=0.0990, Train Loss=0.0900, Val Acc=0.0992, Val Loss=0.0900
Epoch 5: Train Acc=0.0994, Train Loss=0.0900, Val Acc=0.0992, Val Loss=0.0900
Epoch 6: Train Acc=0.0997, Train Loss=0.0900, Val Acc=0.0996, Val Loss=0.0900
Epoch 7: Train Acc=0.1001, Train Loss=0.0900, Val Acc=0.0998, Val Loss=0.0900
Epoch 8: Train Acc=0.1004, Train Loss=0.0900, Val Acc=0.1000, Val Loss=0.0900
Epoch 9: Train Acc=0.1005, Train Loss=0.0900, Val Acc=0.1000, Val Loss=0.0900
Epoch 10: Train Acc=0.1007, Train Loss=0.0900, Val Acc=0.1000, Val Loss=0.0900


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▆▅▁▂▄▅▆▇▇█
train_loss,█▇▆▅▅▄▃▂▂▁
val_acc,▁▄▅▆▆▇▇███
val_loss,▆█▇▆▅▄▄▃▂▁

0,1
epoch,10.0
train_acc,0.10073
train_loss,0.09
val_acc,0.1
val_loss,0.09


[34m[1mwandb[0m: Agent Starting Run: svv71aid with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1885, Train Loss=0.0898, Val Acc=0.1858, Val Loss=0.0898
Epoch 2: Train Acc=0.2732, Train Loss=0.0892, Val Acc=0.2710, Val Loss=0.0892
Epoch 3: Train Acc=0.2432, Train Loss=0.0863, Val Acc=0.2442, Val Loss=0.0862
Epoch 4: Train Acc=0.2971, Train Loss=0.0800, Val Acc=0.2978, Val Loss=0.0800
Epoch 5: Train Acc=0.3363, Train Loss=0.0768, Val Acc=0.3402, Val Loss=0.0767
Epoch 6: Train Acc=0.3889, Train Loss=0.0734, Val Acc=0.3952, Val Loss=0.0732
Epoch 7: Train Acc=0.4321, Train Loss=0.0704, Val Acc=0.4350, Val Loss=0.0702
Epoch 8: Train Acc=0.4624, Train Loss=0.0679, Val Acc=0.4626, Val Loss=0.0677
Epoch 9: Train Acc=0.4873, Train Loss=0.0655, Val Acc=0.4878, Val Loss=0.0653
Epoch 10: Train Acc=0.5075, Train Loss=0.0632, Val Acc=0.5096, Val Loss=0.0629


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▂▃▄▅▆▇██
train_loss,██▇▅▅▄▃▂▂▁
val_acc,▁▃▂▃▄▆▆▇██
val_loss,██▇▅▅▄▃▂▂▁

0,1
epoch,10.0
train_acc,0.50747
train_loss,0.06319
val_acc,0.5096
val_loss,0.06294


[34m[1mwandb[0m: Agent Starting Run: l4kl3dgz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8131, Train Loss=0.0273, Val Acc=0.8160, Val Loss=0.0268
Epoch 2: Train Acc=0.8409, Train Loss=0.0232, Val Acc=0.8440, Val Loss=0.0228
Epoch 3: Train Acc=0.8527, Train Loss=0.0213, Val Acc=0.8552, Val Loss=0.0210
Epoch 4: Train Acc=0.8607, Train Loss=0.0201, Val Acc=0.8610, Val Loss=0.0200
Epoch 5: Train Acc=0.8658, Train Loss=0.0194, Val Acc=0.8646, Val Loss=0.0194


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.8658
train_loss,0.01936
val_acc,0.8646
val_loss,0.01941


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tqgvm7sc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 2: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 6: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 7: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 8: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 9: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 10: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▄▄▃▂▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▃▅▆▇▇████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: te6tygvv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.0999, Train Loss=0.0900, Val Acc=0.1008, Val Loss=0.0900
Epoch 2: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▃▅▆█
train_acc,▁████
train_loss,█▂▁▁▁
val_acc,█▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09001


[34m[1mwandb[0m: Agent Starting Run: ahijozhb with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8424, Train Loss=0.0222, Val Acc=0.8408, Val Loss=0.0227
Epoch 2: Train Acc=0.8562, Train Loss=0.0201, Val Acc=0.8520, Val Loss=0.0211
Epoch 3: Train Acc=0.8710, Train Loss=0.0182, Val Acc=0.8618, Val Loss=0.0198
Epoch 4: Train Acc=0.8801, Train Loss=0.0170, Val Acc=0.8662, Val Loss=0.0189
Epoch 5: Train Acc=0.8893, Train Loss=0.0157, Val Acc=0.8752, Val Loss=0.0180


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▇█
train_loss,█▆▄▂▁
val_acc,▁▃▅▆█
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_acc,0.88925
train_loss,0.01572
val_acc,0.8752
val_loss,0.01801


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k4h8jcgn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7735, Train Loss=0.0358, Val Acc=0.7754, Val Loss=0.0356
Epoch 2: Train Acc=0.8108, Train Loss=0.0282, Val Acc=0.8160, Val Loss=0.0277
Epoch 3: Train Acc=0.8241, Train Loss=0.0256, Val Acc=0.8288, Val Loss=0.0250
Epoch 4: Train Acc=0.8317, Train Loss=0.0242, Val Acc=0.8374, Val Loss=0.0236
Epoch 5: Train Acc=0.8377, Train Loss=0.0233, Val Acc=0.8432, Val Loss=0.0227
Epoch 6: Train Acc=0.8418, Train Loss=0.0226, Val Acc=0.8478, Val Loss=0.0220
Epoch 7: Train Acc=0.8459, Train Loss=0.0220, Val Acc=0.8502, Val Loss=0.0215
Epoch 8: Train Acc=0.8488, Train Loss=0.0216, Val Acc=0.8532, Val Loss=0.0212
Epoch 9: Train Acc=0.8512, Train Loss=0.0211, Val Acc=0.8574, Val Loss=0.0208
Epoch 10: Train Acc=0.8537, Train Loss=0.0208, Val Acc=0.8582, Val Loss=0.0206


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▄▃▃▂▂▂▁▁▁
val_acc,▁▄▆▆▇▇▇███
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.85369
train_loss,0.02079
val_acc,0.8582
val_loss,0.02059


[34m[1mwandb[0m: Agent Starting Run: anqmxka3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 2: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 6: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 7: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 8: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 9: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 10: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,█▄▂▂▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▃▅▆▆▇▇███

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09


[34m[1mwandb[0m: Agent Starting Run: 6xmht8ae with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.4420, Train Loss=0.0646, Val Acc=0.4452, Val Loss=0.0643
Epoch 2: Train Acc=0.6427, Train Loss=0.0484, Val Acc=0.6494, Val Loss=0.0481
Epoch 3: Train Acc=0.7326, Train Loss=0.0391, Val Acc=0.7436, Val Loss=0.0383
Epoch 4: Train Acc=0.7817, Train Loss=0.0320, Val Acc=0.7876, Val Loss=0.0313
Epoch 5: Train Acc=0.7985, Train Loss=0.0286, Val Acc=0.8020, Val Loss=0.0281


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇██
train_loss,█▅▃▂▁
val_acc,▁▅▇██
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.79853
train_loss,0.02858
val_acc,0.802
val_loss,0.0281


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e2k9ytia with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1014, Train Loss=0.0900, Val Acc=0.0980, Val Loss=0.0900
Epoch 2: Train Acc=0.1061, Train Loss=0.0900, Val Acc=0.1014, Val Loss=0.0900
Epoch 3: Train Acc=0.1180, Train Loss=0.0900, Val Acc=0.1108, Val Loss=0.0900
Epoch 4: Train Acc=0.1245, Train Loss=0.0900, Val Acc=0.1162, Val Loss=0.0900
Epoch 5: Train Acc=0.1119, Train Loss=0.0900, Val Acc=0.1048, Val Loss=0.0900


0,1
epoch,▁▃▅▆█
train_acc,▁▂▆█▄
train_loss,█▇▅▂▁
val_acc,▁▂▆█▄
val_loss,▁▃▄▆█

0,1
epoch,5.0
train_acc,0.11193
train_loss,0.09
val_acc,0.1048
val_loss,0.09


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: r6ad4ssm with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 2: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 3: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 4: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 5: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 6: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 7: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 8: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 9: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 10: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▆▃▆█▆▆▃▃▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▅▇███████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.09
val_acc,0.0914
val_loss,0.09


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mtirbfhi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7875, Train Loss=0.0331, Val Acc=0.7924, Val Loss=0.0325
Epoch 2: Train Acc=0.8116, Train Loss=0.0282, Val Acc=0.8200, Val Loss=0.0275
Epoch 3: Train Acc=0.8217, Train Loss=0.0261, Val Acc=0.8296, Val Loss=0.0253
Epoch 4: Train Acc=0.8276, Train Loss=0.0249, Val Acc=0.8350, Val Loss=0.0241
Epoch 5: Train Acc=0.8332, Train Loss=0.0241, Val Acc=0.8386, Val Loss=0.0233


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.83322
train_loss,0.02407
val_acc,0.8386
val_loss,0.02329


[34m[1mwandb[0m: Agent Starting Run: nrthseoi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7005, Train Loss=0.0452, Val Acc=0.7080, Val Loss=0.0446
Epoch 2: Train Acc=0.7626, Train Loss=0.0346, Val Acc=0.7710, Val Loss=0.0338
Epoch 3: Train Acc=0.7972, Train Loss=0.0294, Val Acc=0.8076, Val Loss=0.0286
Epoch 4: Train Acc=0.8138, Train Loss=0.0268, Val Acc=0.8244, Val Loss=0.0259
Epoch 5: Train Acc=0.8241, Train Loss=0.0252, Val Acc=0.8304, Val Loss=0.0243


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▂▂▁
val_acc,▁▅▇██
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.82407
train_loss,0.0252
val_acc,0.8304
val_loss,0.02433


[34m[1mwandb[0m: Agent Starting Run: 6e5rbbt6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.3075, Train Loss=0.0865, Val Acc=0.3084, Val Loss=0.0865
Epoch 2: Train Acc=0.4187, Train Loss=0.0826, Val Acc=0.4288, Val Loss=0.0825
Epoch 3: Train Acc=0.5417, Train Loss=0.0767, Val Acc=0.5516, Val Loss=0.0766
Epoch 4: Train Acc=0.6155, Train Loss=0.0689, Val Acc=0.6232, Val Loss=0.0685
Epoch 5: Train Acc=0.6426, Train Loss=0.0609, Val Acc=0.6504, Val Loss=0.0603


0,1
epoch,▁▃▅▆█
train_acc,▁▃▆▇█
train_loss,█▇▅▃▁
val_acc,▁▃▆▇█
val_loss,█▇▅▃▁

0,1
epoch,5.0
train_acc,0.64258
train_loss,0.06093
val_acc,0.6504
val_loss,0.06031


[34m[1mwandb[0m: Agent Starting Run: e8u14y8k with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8592, Train Loss=0.0200, Val Acc=0.8582, Val Loss=0.0199
Epoch 2: Train Acc=0.8751, Train Loss=0.0178, Val Acc=0.8738, Val Loss=0.0181
Epoch 3: Train Acc=0.8855, Train Loss=0.0164, Val Acc=0.8802, Val Loss=0.0172
Epoch 4: Train Acc=0.8921, Train Loss=0.0155, Val Acc=0.8842, Val Loss=0.0166
Epoch 5: Train Acc=0.8976, Train Loss=0.0147, Val Acc=0.8870, Val Loss=0.0162


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.89756
train_loss,0.01474
val_acc,0.887
val_loss,0.01616


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p4c3rakc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1050, Train Loss=0.0900, Val Acc=0.1038, Val Loss=0.0900
Epoch 2: Train Acc=0.1226, Train Loss=0.0900, Val Acc=0.1222, Val Loss=0.0900
Epoch 3: Train Acc=0.1332, Train Loss=0.0899, Val Acc=0.1328, Val Loss=0.0899
Epoch 4: Train Acc=0.2123, Train Loss=0.0898, Val Acc=0.2134, Val Loss=0.0898
Epoch 5: Train Acc=0.2162, Train Loss=0.0887, Val Acc=0.2124, Val Loss=0.0887


0,1
epoch,▁▃▅▆█
train_acc,▁▂▃██
train_loss,███▇▁
val_acc,▁▂▃██
val_loss,██▇▇▁

0,1
epoch,5.0
train_acc,0.21618
train_loss,0.08866
val_acc,0.2124
val_loss,0.08868


[34m[1mwandb[0m: Agent Starting Run: 1icalqm8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8577, Train Loss=0.0201, Val Acc=0.8546, Val Loss=0.0203
Epoch 2: Train Acc=0.8761, Train Loss=0.0176, Val Acc=0.8698, Val Loss=0.0181
Epoch 3: Train Acc=0.8821, Train Loss=0.0167, Val Acc=0.8768, Val Loss=0.0175
Epoch 4: Train Acc=0.8863, Train Loss=0.0161, Val Acc=0.8796, Val Loss=0.0171
Epoch 5: Train Acc=0.8899, Train Loss=0.0156, Val Acc=0.8798, Val Loss=0.0169
Epoch 6: Train Acc=0.8931, Train Loss=0.0152, Val Acc=0.8816, Val Loss=0.0167
Epoch 7: Train Acc=0.8956, Train Loss=0.0148, Val Acc=0.8822, Val Loss=0.0165
Epoch 8: Train Acc=0.8980, Train Loss=0.0145, Val Acc=0.8832, Val Loss=0.0164
Epoch 9: Train Acc=0.9002, Train Loss=0.0142, Val Acc=0.8840, Val Loss=0.0163
Epoch 10: Train Acc=0.9028, Train Loss=0.0139, Val Acc=0.8852, Val Loss=0.0162


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▅▆▆▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▄▆▇▇▇▇███
val_loss,█▄▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.90282
train_loss,0.01389
val_acc,0.8852
val_loss,0.01623


[34m[1mwandb[0m: Agent Starting Run: ixlkytiz with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7533, Train Loss=0.0335, Val Acc=0.7486, Val Loss=0.0335
Epoch 2: Train Acc=0.8414, Train Loss=0.0230, Val Acc=0.8404, Val Loss=0.0229
Epoch 3: Train Acc=0.8594, Train Loss=0.0205, Val Acc=0.8580, Val Loss=0.0205
Epoch 4: Train Acc=0.8695, Train Loss=0.0188, Val Acc=0.8648, Val Loss=0.0190
Epoch 5: Train Acc=0.8757, Train Loss=0.0179, Val Acc=0.8702, Val Loss=0.0186
Epoch 6: Train Acc=0.8811, Train Loss=0.0173, Val Acc=0.8710, Val Loss=0.0181
Epoch 7: Train Acc=0.8853, Train Loss=0.0167, Val Acc=0.8712, Val Loss=0.0180
Epoch 8: Train Acc=0.8885, Train Loss=0.0163, Val Acc=0.8716, Val Loss=0.0180
Epoch 9: Train Acc=0.8919, Train Loss=0.0158, Val Acc=0.8768, Val Loss=0.0176
Epoch 10: Train Acc=0.8918, Train Loss=0.0157, Val Acc=0.8738, Val Loss=0.0179


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_acc,▁▆▇▇██████
val_loss,█▃▂▂▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.89182
train_loss,0.01568
val_acc,0.8738
val_loss,0.01795


[34m[1mwandb[0m: Agent Starting Run: wgtiriet with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1198, Train Loss=0.0899, Val Acc=0.1194, Val Loss=0.0899
Epoch 2: Train Acc=0.5901, Train Loss=0.0551, Val Acc=0.5988, Val Loss=0.0545
Epoch 3: Train Acc=0.7320, Train Loss=0.0374, Val Acc=0.7370, Val Loss=0.0369
Epoch 4: Train Acc=0.7698, Train Loss=0.0312, Val Acc=0.7764, Val Loss=0.0309
Epoch 5: Train Acc=0.8003, Train Loss=0.0271, Val Acc=0.8082, Val Loss=0.0266
Epoch 6: Train Acc=0.8349, Train Loss=0.0239, Val Acc=0.8376, Val Loss=0.0236
Epoch 7: Train Acc=0.8468, Train Loss=0.0221, Val Acc=0.8466, Val Loss=0.0220
Epoch 8: Train Acc=0.8551, Train Loss=0.0209, Val Acc=0.8524, Val Loss=0.0209
Epoch 9: Train Acc=0.8623, Train Loss=0.0198, Val Acc=0.8582, Val Loss=0.0200
Epoch 10: Train Acc=0.8684, Train Loss=0.0189, Val Acc=0.8634, Val Loss=0.0193


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▇▇▇█████
train_loss,█▅▃▂▂▁▁▁▁▁
val_acc,▁▆▇▇▇█████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.86836
train_loss,0.01894
val_acc,0.8634
val_loss,0.01934


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: r0f1s10k with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.0900, Val Acc=0.0914, Val Loss=0.0900
Epoch 2: Train Acc=0.1085, Train Loss=0.0899, Val Acc=0.1006, Val Loss=0.0899
Epoch 3: Train Acc=0.2599, Train Loss=0.0780, Val Acc=0.2692, Val Loss=0.0779
Epoch 4: Train Acc=0.5786, Train Loss=0.0523, Val Acc=0.5886, Val Loss=0.0517
Epoch 5: Train Acc=0.6817, Train Loss=0.0428, Val Acc=0.6846, Val Loss=0.0422


0,1
epoch,▁▃▅▆█
train_acc,▁▁▃▇█
train_loss,██▆▂▁
val_acc,▁▁▃▇█
val_loss,██▆▂▁

0,1
epoch,5.0
train_acc,0.68173
train_loss,0.04279
val_acc,0.6846
val_loss,0.04221


In [29]:
def main():
    sweep_config = {
        'method': 'bayes',
        'metric': {'name': 'accuracy', 'goal': 'maximize'},
        'parameters': {
            'epochs': {'values': [5, 10]},
            'num_layers': {'values': [3, 4, 5]},
            'hidden_size': {'values': [32, 64, 128]},
            'weight_decay': {'values': [0, 0.0005, 0.5]},
            'learning_rate': {'values': [1e-3, 1e-4]},
            'optimizer': {'values': ['stochastic', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']},
            'batch_size': {'values': [16, 32, 64]},
            'weight_init': {'values': ['random', 'xavier']},
            'activation': {'values': ['sigmoid', 'tanh', 'relu']},
        }
    }
    sweep_id = wandb.sweep(sweep_config, project="Vinod_Assignment1_Question8_CrossEntropy")
    wandb.agent(sweep_id, function=train, count=50)

def train():
    wandb.init(project="Vinod_Assignment1_Question8_CrossEntropy")
    # wandb.init()
    config = wandb.config
    run_name = f"Opt-{config.optimizer}_Layers-{config.num_layers}_HS-{config.hidden_size}_LR-{config.learning_rate}_Batch-{config.batch_size}_Act-{config.activation}"
    wandb.run.name = run_name

    # x_train, y_train, x_val, y_val, _, _ = Data_Preprocess()

    optimizer = config.optimizer

    if optimizer == 'stochastic':
        trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
        # trained_weights = Stochastic_GD(lr, x_train, y_train, x_val, y_val, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size)
    elif optimizer == 'momentum':
        trained_weights = Momentum_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'nag':
        trained_weights = Nesterov_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'rmsprop':
        trained_weights = RMS_Opt(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'adam':
        trained_weights = Adam_Opt(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')
    elif optimizer == 'nadam':
        trained_weights = NAdam_Opt(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='cross_entropy')

    #wandb.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss})

    wandb.finish()

if __name__ == "__main__":
    main()


Create sweep with ID: uvbda342
Sweep URL: https://wandb.ai/viinod9-iitm/Vinod_Assignment1_Question8_CrossEntropy/sweeps/uvbda342


[34m[1mwandb[0m: Agent Starting Run: 9ggby5fx with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1988, Train Loss=0.2227, Val Acc=0.1914, Val Loss=0.2227
Epoch 2: Train Acc=0.1996, Train Loss=0.1854, Val Acc=0.2020, Val Loss=0.1853
Epoch 3: Train Acc=0.1992, Train Loss=0.1748, Val Acc=0.2012, Val Loss=0.1746
Epoch 4: Train Acc=0.2016, Train Loss=0.1712, Val Acc=0.2004, Val Loss=0.1710
Epoch 5: Train Acc=0.2023, Train Loss=0.1695, Val Acc=0.2010, Val Loss=0.1693
Epoch 6: Train Acc=0.2031, Train Loss=0.1687, Val Acc=0.2014, Val Loss=0.1684
Epoch 7: Train Acc=0.2037, Train Loss=0.1682, Val Acc=0.2022, Val Loss=0.1680
Epoch 8: Train Acc=0.2042, Train Loss=0.1679, Val Acc=0.2028, Val Loss=0.1677
Epoch 9: Train Acc=0.2048, Train Loss=0.1676, Val Acc=0.2032, Val Loss=0.1675
Epoch 10: Train Acc=0.2054, Train Loss=0.1674, Val Acc=0.2046, Val Loss=0.1674


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▂▁▄▅▆▆▇▇█
train_loss,█▃▂▁▁▁▁▁▁▁
val_acc,▁▇▆▆▆▆▇▇▇█
val_loss,█▃▂▁▁▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.20536
train_loss,0.16743
val_acc,0.2046
val_loss,0.16735


[34m[1mwandb[0m: Agent Starting Run: 8oygmsim with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8013, Train Loss=0.0522, Val Acc=0.8108, Val Loss=0.0519
Epoch 2: Train Acc=0.8518, Train Loss=0.0427, Val Acc=0.8488, Val Loss=0.0430
Epoch 3: Train Acc=0.8644, Train Loss=0.0387, Val Acc=0.8592, Val Loss=0.0397
Epoch 4: Train Acc=0.8724, Train Loss=0.0358, Val Acc=0.8672, Val Loss=0.0374
Epoch 5: Train Acc=0.8798, Train Loss=0.0336, Val Acc=0.8712, Val Loss=0.0358


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇██
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.8798
train_loss,0.03362
val_acc,0.8712
val_loss,0.0358


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jfcxkd5x with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0999, Train Loss=0.2219, Val Acc=0.1012, Val Loss=0.2219
Epoch 2: Train Acc=0.1994, Train Loss=0.1858, Val Acc=0.1914, Val Loss=0.1858
Epoch 3: Train Acc=0.2002, Train Loss=0.1759, Val Acc=0.1918, Val Loss=0.1758
Epoch 4: Train Acc=0.2013, Train Loss=0.1722, Val Acc=0.1930, Val Loss=0.1721
Epoch 5: Train Acc=0.2035, Train Loss=0.1701, Val Acc=0.1952, Val Loss=0.1700
Epoch 6: Train Acc=0.2058, Train Loss=0.1679, Val Acc=0.1978, Val Loss=0.1678
Epoch 7: Train Acc=0.2850, Train Loss=0.1649, Val Acc=0.2780, Val Loss=0.1648
Epoch 8: Train Acc=0.2930, Train Loss=0.1616, Val Acc=0.2856, Val Loss=0.1617
Epoch 9: Train Acc=0.2913, Train Loss=0.1585, Val Acc=0.2886, Val Loss=0.1588
Epoch 10: Train Acc=0.2944, Train Loss=0.1559, Val Acc=0.2918, Val Loss=0.1563


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▅▅▅▅████
train_loss,█▄▃▃▃▂▂▂▁▁
val_acc,▁▄▄▄▄▅▇███
val_loss,█▄▃▃▂▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.29444
train_loss,0.15588
val_acc,0.2918
val_loss,0.15631


[34m[1mwandb[0m: Agent Starting Run: x4dgf30l with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1605, Train Loss=0.2280, Val Acc=0.1564, Val Loss=0.2279
Epoch 2: Train Acc=0.3239, Train Loss=0.1934, Val Acc=0.3288, Val Loss=0.1925
Epoch 3: Train Acc=0.5346, Train Loss=0.1298, Val Acc=0.5380, Val Loss=0.1288
Epoch 4: Train Acc=0.5898, Train Loss=0.1069, Val Acc=0.5952, Val Loss=0.1061
Epoch 5: Train Acc=0.6252, Train Loss=0.0947, Val Acc=0.6334, Val Loss=0.0942


0,1
epoch,▁▃▅▆█
train_acc,▁▃▇▇█
train_loss,█▆▃▂▁
val_acc,▁▄▇▇█
val_loss,█▆▃▂▁

0,1
epoch,5.0
train_acc,0.62516
train_loss,0.09473
val_acc,0.6334
val_loss,0.09416


[34m[1mwandb[0m: Agent Starting Run: xy4srxwh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


[34m[1mwandb[0m: [32m[41mERROR[0m Run xy4srxwh errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

[34m[1mwandb[0m: [32m[41mERROR[0m Run xkh1xm0l errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

[34m[1mwandb[0m: [32m[41mERROR[0m Run k82a09pq errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Epoch 1: Train Acc=0.1997, Train Loss=0.1687, Val Acc=0.1984, Val Loss=0.1684
Epoch 2: Train Acc=0.2016, Train Loss=0.1676, Val Acc=0.2162, Val Loss=0.1673
Epoch 3: Train Acc=0.2023, Train Loss=0.1670, Val Acc=0.2162, Val Loss=0.1666
Epoch 4: Train Acc=0.2036, Train Loss=0.1664, Val Acc=0.2190, Val Loss=0.1661
Epoch 5: Train Acc=0.2074, Train Loss=0.1655, Val Acc=0.2210, Val Loss=0.1653


0,1
epoch,▁▃▅▆█
train_acc,▁▃▃▅█
train_loss,█▆▄▃▁
val_acc,▁▇▇▇█
val_loss,█▅▄▃▁

0,1
epoch,5.0
train_acc,0.20738
train_loss,0.16553
val_acc,0.221
val_loss,0.16528


[34m[1mwandb[0m: Agent Starting Run: 25m6a0co with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8404, Train Loss=0.0424, Val Acc=0.8424, Val Loss=0.0432
Epoch 2: Train Acc=0.8608, Train Loss=0.0374, Val Acc=0.8596, Val Loss=0.0396
Epoch 3: Train Acc=0.8739, Train Loss=0.0346, Val Acc=0.8668, Val Loss=0.0383
Epoch 4: Train Acc=0.8842, Train Loss=0.0317, Val Acc=0.8722, Val Loss=0.0360
Epoch 5: Train Acc=0.8880, Train Loss=0.0307, Val Acc=0.8730, Val Loss=0.0357


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▇██
val_loss,█▅▃▁▁

0,1
epoch,5.0
train_acc,0.88796
train_loss,0.03066
val_acc,0.873
val_loss,0.03571


[34m[1mwandb[0m: Agent Starting Run: wayutr2p with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


[34m[1mwandb[0m: [32m[41mERROR[0m Run wayutr2p errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

[34m[1mwandb[0m: [32m[41mERROR[0m Run nf34jrp3 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Epoch 1: Train Acc=0.4149, Train Loss=0.1416, Val Acc=0.4154, Val Loss=0.1413
Epoch 2: Train Acc=0.5385, Train Loss=0.1112, Val Acc=0.5364, Val Loss=0.1113
Epoch 3: Train Acc=0.5759, Train Loss=0.0964, Val Acc=0.5796, Val Loss=0.0969
Epoch 4: Train Acc=0.6859, Train Loss=0.0831, Val Acc=0.6900, Val Loss=0.0837
Epoch 5: Train Acc=0.7679, Train Loss=0.0712, Val Acc=0.7656, Val Loss=0.0721
Epoch 6: Train Acc=0.7973, Train Loss=0.0613, Val Acc=0.7928, Val Loss=0.0629
Epoch 7: Train Acc=0.8121, Train Loss=0.0555, Val Acc=0.8080, Val Loss=0.0574
Epoch 8: Train Acc=0.8339, Train Loss=0.0505, Val Acc=0.8292, Val Loss=0.0533
Epoch 9: Train Acc=0.8526, Train Loss=0.0465, Val Acc=0.8490, Val Loss=0.0501
Epoch 10: Train Acc=0.8613, Train Loss=0.0437, Val Acc=0.8536, Val Loss=0.0482


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▇▇▇███
train_loss,█▆▅▄▃▂▂▁▁▁
val_acc,▁▃▄▅▇▇▇███
val_loss,█▆▅▄▃▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.86133
train_loss,0.04369
val_acc,0.8536
val_loss,0.04817


[34m[1mwandb[0m: Agent Starting Run: 726ugelj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2029, Train Loss=0.1671, Val Acc=0.2012, Val Loss=0.1669
Epoch 2: Train Acc=0.2407, Train Loss=0.1660, Val Acc=0.2536, Val Loss=0.1657
Epoch 3: Train Acc=0.2372, Train Loss=0.1632, Val Acc=0.2488, Val Loss=0.1630
Epoch 4: Train Acc=0.3318, Train Loss=0.1557, Val Acc=0.3428, Val Loss=0.1554
Epoch 5: Train Acc=0.4338, Train Loss=0.1288, Val Acc=0.4418, Val Loss=0.1285


0,1
epoch,▁▃▅▆█
train_acc,▁▂▂▅█
train_loss,██▇▆▁
val_acc,▁▃▂▅█
val_loss,██▇▆▁

0,1
epoch,5.0
train_acc,0.43384
train_loss,0.1288
val_acc,0.4418
val_loss,0.12853


[34m[1mwandb[0m: Agent Starting Run: l47lp1be with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8360, Train Loss=0.0465, Val Acc=0.8380, Val Loss=0.0457
Epoch 2: Train Acc=0.8485, Train Loss=0.0430, Val Acc=0.8454, Val Loss=0.0429
Epoch 3: Train Acc=0.8548, Train Loss=0.0407, Val Acc=0.8504, Val Loss=0.0413
Epoch 4: Train Acc=0.8587, Train Loss=0.0393, Val Acc=0.8558, Val Loss=0.0402
Epoch 5: Train Acc=0.8628, Train Loss=0.0380, Val Acc=0.8572, Val Loss=0.0392


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.86284
train_loss,0.03796
val_acc,0.8572
val_loss,0.0392


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k9j7zm1y with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8391, Train Loss=0.0456, Val Acc=0.8422, Val Loss=0.0449
Epoch 2: Train Acc=0.8555, Train Loss=0.0411, Val Acc=0.8574, Val Loss=0.0408
Epoch 3: Train Acc=0.8633, Train Loss=0.0389, Val Acc=0.8666, Val Loss=0.0390
Epoch 4: Train Acc=0.8680, Train Loss=0.0375, Val Acc=0.8688, Val Loss=0.0379
Epoch 5: Train Acc=0.8712, Train Loss=0.0364, Val Acc=0.8716, Val Loss=0.0372


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▅▃▂▁
val_acc,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.87124
train_loss,0.0364
val_acc,0.8716
val_loss,0.03717


[34m[1mwandb[0m: Agent Starting Run: a18jqjzq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.6327, Train Loss=0.1403, Val Acc=0.6486, Val Loss=0.1392
Epoch 2: Train Acc=0.6783, Train Loss=0.1074, Val Acc=0.6884, Val Loss=0.1063
Epoch 3: Train Acc=0.7110, Train Loss=0.0908, Val Acc=0.7222, Val Loss=0.0897
Epoch 4: Train Acc=0.7362, Train Loss=0.0807, Val Acc=0.7432, Val Loss=0.0796
Epoch 5: Train Acc=0.7535, Train Loss=0.0739, Val Acc=0.7600, Val Loss=0.0727


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.75351
train_loss,0.07388
val_acc,0.76
val_loss,0.07272


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tmkc5ucg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


[34m[1mwandb[0m: [32m[41mERROR[0m Run tmkc5ucg errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Epoch 1: Train Acc=0.7218, Train Loss=0.0818, Val Acc=0.7216, Val Loss=0.0811
Epoch 2: Train Acc=0.7805, Train Loss=0.0644, Val Acc=0.7800, Val Loss=0.0637
Epoch 3: Train Acc=0.8079, Train Loss=0.0560, Val Acc=0.8130, Val Loss=0.0553
Epoch 4: Train Acc=0.8216, Train Loss=0.0513, Val Acc=0.8270, Val Loss=0.0507
Epoch 5: Train Acc=0.8302, Train Loss=0.0483, Val Acc=0.8336, Val Loss=0.0479


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▄▃▂▁
val_acc,▁▅▇██
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.83024
train_loss,0.04835
val_acc,0.8336
val_loss,0.04786


[34m[1mwandb[0m: Agent Starting Run: a8q3l54y with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2302, Val Acc=0.0914, Val Loss=0.2302
Epoch 3: Train Acc=0.1008, Train Loss=0.2301, Val Acc=0.0914, Val Loss=0.2302
Epoch 4: Train Acc=0.1361, Train Loss=0.2297, Val Acc=0.1268, Val Loss=0.2297
Epoch 5: Train Acc=0.2027, Train Loss=0.1780, Val Acc=0.1958, Val Loss=0.1775


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▃█
train_loss,████▁
val_acc,▁▁▁▃█
val_loss,████▁

0,1
epoch,5.0
train_acc,0.20265
train_loss,0.178
val_acc,0.1958
val_loss,0.17754


[34m[1mwandb[0m: Agent Starting Run: 5typfex0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.0909, Train Loss=0.2303, Val Acc=0.0818, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,█▁████████
train_loss,█▂▂▂▂▁▁▁▁▁
val_acc,█▁▅▅▅▅▅▅▅▅
val_loss,█▂▂▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: vq68gs6l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7607, Train Loss=0.0676, Val Acc=0.7634, Val Loss=0.0673
Epoch 2: Train Acc=0.8079, Train Loss=0.0529, Val Acc=0.8074, Val Loss=0.0532
Epoch 3: Train Acc=0.8344, Train Loss=0.0468, Val Acc=0.8342, Val Loss=0.0477
Epoch 4: Train Acc=0.8537, Train Loss=0.0420, Val Acc=0.8488, Val Loss=0.0434
Epoch 5: Train Acc=0.8655, Train Loss=0.0388, Val Acc=0.8600, Val Loss=0.0405
Epoch 6: Train Acc=0.8727, Train Loss=0.0367, Val Acc=0.8670, Val Loss=0.0389
Epoch 7: Train Acc=0.8785, Train Loss=0.0351, Val Acc=0.8706, Val Loss=0.0377
Epoch 8: Train Acc=0.8828, Train Loss=0.0337, Val Acc=0.8758, Val Loss=0.0367
Epoch 9: Train Acc=0.8852, Train Loss=0.0328, Val Acc=0.8752, Val Loss=0.0363
Epoch 10: Train Acc=0.8873, Train Loss=0.0318, Val Acc=0.8766, Val Loss=0.0358


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇████
train_loss,█▅▄▃▂▂▂▁▁▁
val_acc,▁▄▅▆▇▇████
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.88725
train_loss,0.03184
val_acc,0.8766
val_loss,0.03578


[34m[1mwandb[0m: Agent Starting Run: v3m743g4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.5516, Train Loss=0.1702, Val Acc=0.5614, Val Loss=0.1693
Epoch 2: Train Acc=0.6602, Train Loss=0.0988, Val Acc=0.6694, Val Loss=0.0976
Epoch 3: Train Acc=0.7013, Train Loss=0.0798, Val Acc=0.7108, Val Loss=0.0790
Epoch 4: Train Acc=0.7408, Train Loss=0.0714, Val Acc=0.7488, Val Loss=0.0708
Epoch 5: Train Acc=0.7671, Train Loss=0.0658, Val Acc=0.7718, Val Loss=0.0651
Epoch 6: Train Acc=0.7832, Train Loss=0.0615, Val Acc=0.7918, Val Loss=0.0607
Epoch 7: Train Acc=0.7959, Train Loss=0.0582, Val Acc=0.8006, Val Loss=0.0574
Epoch 8: Train Acc=0.8057, Train Loss=0.0556, Val Acc=0.8110, Val Loss=0.0548
Epoch 9: Train Acc=0.8128, Train Loss=0.0535, Val Acc=0.8174, Val Loss=0.0527
Epoch 10: Train Acc=0.8185, Train Loss=0.0517, Val Acc=0.8208, Val Loss=0.0509


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▄▃▂▂▂▁▁▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.81845
train_loss,0.05172
val_acc,0.8208
val_loss,0.05091


[34m[1mwandb[0m: Agent Starting Run: 34tkgda0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1989, Train Loss=0.1861, Val Acc=0.2030, Val Loss=0.1849
Epoch 2: Train Acc=0.4338, Train Loss=0.1394, Val Acc=0.4356, Val Loss=0.1392
Epoch 3: Train Acc=0.5456, Train Loss=0.1149, Val Acc=0.5510, Val Loss=0.1147
Epoch 4: Train Acc=0.5544, Train Loss=0.1038, Val Acc=0.5622, Val Loss=0.1038
Epoch 5: Train Acc=0.5615, Train Loss=0.0985, Val Acc=0.5674, Val Loss=0.0986


0,1
epoch,▁▃▅▆█
train_acc,▁▆███
train_loss,█▄▂▁▁
val_acc,▁▅███
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.56145
train_loss,0.09852
val_acc,0.5674
val_loss,0.09859


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1cwz2mse with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7963, Train Loss=0.0551, Val Acc=0.8012, Val Loss=0.0543
Epoch 2: Train Acc=0.8410, Train Loss=0.0451, Val Acc=0.8438, Val Loss=0.0448
Epoch 3: Train Acc=0.8563, Train Loss=0.0406, Val Acc=0.8572, Val Loss=0.0410
Epoch 4: Train Acc=0.8677, Train Loss=0.0369, Val Acc=0.8644, Val Loss=0.0383
Epoch 5: Train Acc=0.8768, Train Loss=0.0342, Val Acc=0.8716, Val Loss=0.0364
Epoch 6: Train Acc=0.8827, Train Loss=0.0325, Val Acc=0.8756, Val Loss=0.0353
Epoch 7: Train Acc=0.8873, Train Loss=0.0312, Val Acc=0.8776, Val Loss=0.0344
Epoch 8: Train Acc=0.8912, Train Loss=0.0301, Val Acc=0.8798, Val Loss=0.0340
Epoch 9: Train Acc=0.8947, Train Loss=0.0292, Val Acc=0.8814, Val Loss=0.0337
Epoch 10: Train Acc=0.8976, Train Loss=0.0284, Val Acc=0.8826, Val Loss=0.0337


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_acc,▁▅▆▆▇▇████
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.89756
train_loss,0.02845
val_acc,0.8826
val_loss,0.03373


[34m[1mwandb[0m: Agent Starting Run: of3k6qty with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


[34m[1mwandb[0m: [32m[41mERROR[0m Run of3k6qty errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Epoch 1: Train Acc=0.6059, Train Loss=0.1044, Val Acc=0.6034, Val Loss=0.1041
Epoch 2: Train Acc=0.7212, Train Loss=0.0750, Val Acc=0.7290, Val Loss=0.0753
Epoch 3: Train Acc=0.7660, Train Loss=0.0620, Val Acc=0.7720, Val Loss=0.0619
Epoch 4: Train Acc=0.7905, Train Loss=0.0553, Val Acc=0.7944, Val Loss=0.0548
Epoch 5: Train Acc=0.8143, Train Loss=0.0508, Val Acc=0.8186, Val Loss=0.0502
Epoch 6: Train Acc=0.8278, Train Loss=0.0478, Val Acc=0.8352, Val Loss=0.0471
Epoch 7: Train Acc=0.8364, Train Loss=0.0456, Val Acc=0.8426, Val Loss=0.0449
Epoch 8: Train Acc=0.8430, Train Loss=0.0438, Val Acc=0.8478, Val Loss=0.0432
Epoch 9: Train Acc=0.8482, Train Loss=0.0424, Val Acc=0.8524, Val Loss=0.0418
Epoch 10: Train Acc=0.8525, Train Loss=0.0412, Val Acc=0.8570, Val Loss=0.0406


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▆▇▇████
train_loss,█▅▃▃▂▂▁▁▁▁
val_acc,▁▄▆▆▇▇████
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.85251
train_loss,0.04116
val_acc,0.857
val_loss,0.0406


[34m[1mwandb[0m: Agent Starting Run: vif9b398 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7649, Train Loss=0.0647, Val Acc=0.7668, Val Loss=0.0652
Epoch 2: Train Acc=0.8403, Train Loss=0.0480, Val Acc=0.8386, Val Loss=0.0490
Epoch 3: Train Acc=0.8606, Train Loss=0.0399, Val Acc=0.8556, Val Loss=0.0415
Epoch 4: Train Acc=0.8656, Train Loss=0.0395, Val Acc=0.8566, Val Loss=0.0424
Epoch 5: Train Acc=0.8773, Train Loss=0.0363, Val Acc=0.8678, Val Loss=0.0401


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇▇█
train_loss,█▄▂▂▁
val_acc,▁▆▇▇█
val_loss,█▃▁▂▁

0,1
epoch,5.0
train_acc,0.87729
train_loss,0.03627
val_acc,0.8678
val_loss,0.04014


[34m[1mwandb[0m: Agent Starting Run: fz6r6rs8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.8284, Train Loss=0.0506, Val Acc=0.8352, Val Loss=0.0499
Epoch 2: Train Acc=0.8597, Train Loss=0.0418, Val Acc=0.8582, Val Loss=0.0430
Epoch 3: Train Acc=0.8643, Train Loss=0.0424, Val Acc=0.8602, Val Loss=0.0462
Epoch 4: Train Acc=0.8667, Train Loss=0.0408, Val Acc=0.8630, Val Loss=0.0456
Epoch 5: Train Acc=0.8590, Train Loss=0.0484, Val Acc=0.8542, Val Loss=0.0568
Epoch 6: Train Acc=0.8643, Train Loss=0.0481, Val Acc=0.8600, Val Loss=0.0561
Epoch 7: Train Acc=0.8614, Train Loss=0.0521, Val Acc=0.8620, Val Loss=0.0583
Epoch 8: Train Acc=0.8707, Train Loss=0.0415, Val Acc=0.8690, Val Loss=0.0476
Epoch 9: Train Acc=0.8739, Train Loss=0.0431, Val Acc=0.8686, Val Loss=0.0511
Epoch 10: Train Acc=0.8639, Train Loss=0.0540, Val Acc=0.8612, Val Loss=0.0652


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▆▇▆██▆
train_loss,▆▂▂▁▅▅▇▁▂█
val_acc,▁▆▆▇▅▆▇██▆
val_loss,▃▁▂▂▅▅▆▂▄█

0,1
epoch,10.0
train_acc,0.86387
train_loss,0.05397
val_acc,0.8612
val_loss,0.06525


[34m[1mwandb[0m: Agent Starting Run: 9fcxroc3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.7744, Train Loss=0.0644, Val Acc=0.7806, Val Loss=0.0643
Epoch 2: Train Acc=0.8298, Train Loss=0.0483, Val Acc=0.8352, Val Loss=0.0481
Epoch 3: Train Acc=0.8537, Train Loss=0.0421, Val Acc=0.8570, Val Loss=0.0424
Epoch 4: Train Acc=0.8626, Train Loss=0.0394, Val Acc=0.8648, Val Loss=0.0403
Epoch 5: Train Acc=0.8734, Train Loss=0.0361, Val Acc=0.8698, Val Loss=0.0379
Epoch 6: Train Acc=0.8775, Train Loss=0.0346, Val Acc=0.8728, Val Loss=0.0370
Epoch 7: Train Acc=0.8834, Train Loss=0.0331, Val Acc=0.8746, Val Loss=0.0360
Epoch 8: Train Acc=0.8863, Train Loss=0.0322, Val Acc=0.8768, Val Loss=0.0354
Epoch 9: Train Acc=0.8881, Train Loss=0.0317, Val Acc=0.8734, Val Loss=0.0357
Epoch 10: Train Acc=0.8894, Train Loss=0.0313, Val Acc=0.8748, Val Loss=0.0358


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▆▆▇▇████
train_loss,█▅▃▃▂▂▁▁▁▁
val_acc,▁▅▇▇▇█████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.88944
train_loss,0.03128
val_acc,0.8748
val_loss,0.03583


[34m[1mwandb[0m: Agent Starting Run: tlyglqny with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5787, Train Loss=0.0998, Val Acc=0.5830, Val Loss=0.0995
Epoch 2: Train Acc=0.6111, Train Loss=0.0952, Val Acc=0.6166, Val Loss=0.0949
Epoch 3: Train Acc=0.6364, Train Loss=0.0919, Val Acc=0.6438, Val Loss=0.0915
Epoch 4: Train Acc=0.6617, Train Loss=0.0881, Val Acc=0.6662, Val Loss=0.0878
Epoch 5: Train Acc=0.6810, Train Loss=0.0841, Val Acc=0.6854, Val Loss=0.0839


0,1
epoch,▁▃▅▆█
train_acc,▁▃▅▇█
train_loss,█▆▄▃▁
val_acc,▁▃▅▇█
val_loss,█▆▄▃▁

0,1
epoch,5.0
train_acc,0.68098
train_loss,0.08412
val_acc,0.6854
val_loss,0.0839


[34m[1mwandb[0m: Agent Starting Run: gvydex3e with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1998, Train Loss=0.1683, Val Acc=0.1980, Val Loss=0.1681
Epoch 2: Train Acc=0.2010, Train Loss=0.1675, Val Acc=0.1996, Val Loss=0.1672
Epoch 3: Train Acc=0.2117, Train Loss=0.1669, Val Acc=0.2238, Val Loss=0.1666
Epoch 4: Train Acc=0.2093, Train Loss=0.1660, Val Acc=0.2244, Val Loss=0.1658
Epoch 5: Train Acc=0.2131, Train Loss=0.1644, Val Acc=0.2302, Val Loss=0.1642


0,1
epoch,▁▃▅▆█
train_acc,▁▂▇▆█
train_loss,█▇▅▄▁
val_acc,▁▁▇▇█
val_loss,█▆▅▄▁

0,1
epoch,5.0
train_acc,0.21313
train_loss,0.16439
val_acc,0.2302
val_loss,0.16424


[34m[1mwandb[0m: Agent Starting Run: gmls0bro with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▇████████
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▇████████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: muptbx4i with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8068, Train Loss=0.0560, Val Acc=0.8122, Val Loss=0.0555
Epoch 2: Train Acc=0.8240, Train Loss=0.0505, Val Acc=0.8302, Val Loss=0.0498
Epoch 3: Train Acc=0.8327, Train Loss=0.0478, Val Acc=0.8398, Val Loss=0.0471
Epoch 4: Train Acc=0.8384, Train Loss=0.0461, Val Acc=0.8466, Val Loss=0.0455
Epoch 5: Train Acc=0.8435, Train Loss=0.0447, Val Acc=0.8502, Val Loss=0.0442
Epoch 6: Train Acc=0.8469, Train Loss=0.0437, Val Acc=0.8532, Val Loss=0.0433
Epoch 7: Train Acc=0.8497, Train Loss=0.0428, Val Acc=0.8578, Val Loss=0.0425
Epoch 8: Train Acc=0.8522, Train Loss=0.0420, Val Acc=0.8592, Val Loss=0.0418
Epoch 9: Train Acc=0.8547, Train Loss=0.0413, Val Acc=0.8608, Val Loss=0.0412
Epoch 10: Train Acc=0.8571, Train Loss=0.0406, Val Acc=0.8622, Val Loss=0.0406


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▅▅▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_acc,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
train_acc,0.85707
train_loss,0.04064
val_acc,0.8622
val_loss,0.04063


[34m[1mwandb[0m: Agent Starting Run: 7xg8s8qs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.2001, Train Loss=0.1679, Val Acc=0.1990, Val Loss=0.1677
Epoch 2: Train Acc=0.2071, Train Loss=0.1670, Val Acc=0.2052, Val Loss=0.1667
Epoch 3: Train Acc=0.2220, Train Loss=0.1655, Val Acc=0.2328, Val Loss=0.1654
Epoch 4: Train Acc=0.2409, Train Loss=0.1612, Val Acc=0.2548, Val Loss=0.1610
Epoch 5: Train Acc=0.3977, Train Loss=0.1419, Val Acc=0.4064, Val Loss=0.1417
Epoch 6: Train Acc=0.4168, Train Loss=0.1254, Val Acc=0.4256, Val Loss=0.1252
Epoch 7: Train Acc=0.4519, Train Loss=0.1203, Val Acc=0.4630, Val Loss=0.1202
Epoch 8: Train Acc=0.4911, Train Loss=0.1169, Val Acc=0.4956, Val Loss=0.1168
Epoch 9: Train Acc=0.5462, Train Loss=0.1128, Val Acc=0.5496, Val Loss=0.1125
Epoch 10: Train Acc=0.5729, Train Loss=0.1080, Val Acc=0.5778, Val Loss=0.1078


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▂▅▅▆▆▇█
train_loss,███▇▅▃▂▂▂▁
val_acc,▁▁▂▂▅▅▆▆▇█
val_loss,███▇▅▃▂▂▂▁

0,1
epoch,10.0
train_acc,0.57293
train_loss,0.10802
val_acc,0.5778
val_loss,0.10776


[34m[1mwandb[0m: Agent Starting Run: 3h6fmkam with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8300, Train Loss=0.0473, Val Acc=0.8376, Val Loss=0.0465
Epoch 2: Train Acc=0.8571, Train Loss=0.0396, Val Acc=0.8596, Val Loss=0.0401
Epoch 3: Train Acc=0.8698, Train Loss=0.0358, Val Acc=0.8654, Val Loss=0.0372
Epoch 4: Train Acc=0.8772, Train Loss=0.0336, Val Acc=0.8722, Val Loss=0.0358
Epoch 5: Train Acc=0.8837, Train Loss=0.0319, Val Acc=0.8750, Val Loss=0.0349
Epoch 6: Train Acc=0.8897, Train Loss=0.0305, Val Acc=0.8758, Val Loss=0.0342
Epoch 7: Train Acc=0.8931, Train Loss=0.0295, Val Acc=0.8776, Val Loss=0.0339
Epoch 8: Train Acc=0.8961, Train Loss=0.0287, Val Acc=0.8804, Val Loss=0.0338
Epoch 9: Train Acc=0.8984, Train Loss=0.0279, Val Acc=0.8820, Val Loss=0.0337
Epoch 10: Train Acc=0.9005, Train Loss=0.0272, Val Acc=0.8818, Val Loss=0.0337


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▂▁▁
val_acc,▁▄▅▆▇▇▇███
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_acc,0.90051
train_loss,0.02724
val_acc,0.8818
val_loss,0.03374


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sqwxo2hw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1996, Train Loss=0.1695, Val Acc=0.1978, Val Loss=0.1693
Epoch 2: Train Acc=0.2007, Train Loss=0.1681, Val Acc=0.1988, Val Loss=0.1679
Epoch 3: Train Acc=0.2073, Train Loss=0.1673, Val Acc=0.2208, Val Loss=0.1671
Epoch 4: Train Acc=0.2095, Train Loss=0.1666, Val Acc=0.2250, Val Loss=0.1664
Epoch 5: Train Acc=0.2939, Train Loss=0.1635, Val Acc=0.3034, Val Loss=0.1634
Epoch 6: Train Acc=0.3814, Train Loss=0.1483, Val Acc=0.3918, Val Loss=0.1483
Epoch 7: Train Acc=0.3873, Train Loss=0.1358, Val Acc=0.3988, Val Loss=0.1359
Epoch 8: Train Acc=0.3960, Train Loss=0.1291, Val Acc=0.4066, Val Loss=0.1292
Epoch 9: Train Acc=0.3969, Train Loss=0.1253, Val Acc=0.4076, Val Loss=0.1254
Epoch 10: Train Acc=0.4001, Train Loss=0.1229, Val Acc=0.4110, Val Loss=0.1230


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▄▇████
train_loss,████▇▅▃▂▁▁
val_acc,▁▁▂▂▄▇████
val_loss,████▇▅▃▂▁▁

0,1
epoch,10.0
train_acc,0.40007
train_loss,0.12286
val_acc,0.411
val_loss,0.12303


[34m[1mwandb[0m: Agent Starting Run: tzxget0z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8175, Train Loss=0.0519, Val Acc=0.8242, Val Loss=0.0508
Epoch 2: Train Acc=0.8380, Train Loss=0.0461, Val Acc=0.8444, Val Loss=0.0452
Epoch 3: Train Acc=0.8470, Train Loss=0.0434, Val Acc=0.8518, Val Loss=0.0426
Epoch 4: Train Acc=0.8521, Train Loss=0.0418, Val Acc=0.8584, Val Loss=0.0412
Epoch 5: Train Acc=0.8561, Train Loss=0.0405, Val Acc=0.8626, Val Loss=0.0401


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆▇█
train_loss,█▄▃▂▁
val_acc,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_acc,0.85607
train_loss,0.04054
val_acc,0.8626
val_loss,0.04014


[34m[1mwandb[0m: Agent Starting Run: ybd0fqox with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


[34m[1mwandb[0m: [32m[41mERROR[0m Run ybd0fqox errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Epoch 1: Train Acc=0.5702, Train Loss=0.1014, Val Acc=0.5714, Val Loss=0.1013
Epoch 2: Train Acc=0.7687, Train Loss=0.0669, Val Acc=0.7704, Val Loss=0.0671
Epoch 3: Train Acc=0.7943, Train Loss=0.0570, Val Acc=0.7964, Val Loss=0.0572
Epoch 4: Train Acc=0.8245, Train Loss=0.0507, Val Acc=0.8274, Val Loss=0.0512
Epoch 5: Train Acc=0.8403, Train Loss=0.0464, Val Acc=0.8418, Val Loss=0.0474


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇██
train_loss,█▄▂▂▁
val_acc,▁▆▇██
val_loss,█▄▂▁▁

0,1
epoch,5.0
train_acc,0.84035
train_loss,0.0464
val_acc,0.8418
val_loss,0.04735


[34m[1mwandb[0m: Agent Starting Run: fwc68rod with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.5068, Train Loss=0.1177, Val Acc=0.5052, Val Loss=0.1177
Epoch 2: Train Acc=0.6220, Train Loss=0.0881, Val Acc=0.6246, Val Loss=0.0879
Epoch 3: Train Acc=0.7406, Train Loss=0.0722, Val Acc=0.7446, Val Loss=0.0721
Epoch 4: Train Acc=0.7858, Train Loss=0.0594, Val Acc=0.7860, Val Loss=0.0598
Epoch 5: Train Acc=0.8129, Train Loss=0.0522, Val Acc=0.8122, Val Loss=0.0524


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.81285
train_loss,0.05216
val_acc,0.8122
val_loss,0.05244


[34m[1mwandb[0m: Agent Starting Run: rpm7bvps with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8390, Train Loss=0.0456, Val Acc=0.8444, Val Loss=0.0456
Epoch 2: Train Acc=0.8503, Train Loss=0.0421, Val Acc=0.8520, Val Loss=0.0424
Epoch 3: Train Acc=0.8571, Train Loss=0.0400, Val Acc=0.8564, Val Loss=0.0408
Epoch 4: Train Acc=0.8617, Train Loss=0.0386, Val Acc=0.8590, Val Loss=0.0396
Epoch 5: Train Acc=0.8656, Train Loss=0.0373, Val Acc=0.8630, Val Loss=0.0387


0,1
epoch,▁▃▅▆█
train_acc,▁▄▆▇█
train_loss,█▅▃▂▁
val_acc,▁▄▆▆█
val_loss,█▅▃▂▁

0,1
epoch,5.0
train_acc,0.8656
train_loss,0.03732
val_acc,0.863
val_loss,0.03867


[34m[1mwandb[0m: Agent Starting Run: m8asi7pj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.8593, Train Loss=0.0388, Val Acc=0.8538, Val Loss=0.0390
Epoch 2: Train Acc=0.8739, Train Loss=0.0347, Val Acc=0.8720, Val Loss=0.0357
Epoch 3: Train Acc=0.8815, Train Loss=0.0324, Val Acc=0.8800, Val Loss=0.0341
Epoch 4: Train Acc=0.8885, Train Loss=0.0308, Val Acc=0.8812, Val Loss=0.0331
Epoch 5: Train Acc=0.8929, Train Loss=0.0295, Val Acc=0.8812, Val Loss=0.0324
Epoch 6: Train Acc=0.8963, Train Loss=0.0285, Val Acc=0.8844, Val Loss=0.0319
Epoch 7: Train Acc=0.8994, Train Loss=0.0276, Val Acc=0.8864, Val Loss=0.0315
Epoch 8: Train Acc=0.9015, Train Loss=0.0269, Val Acc=0.8878, Val Loss=0.0312
Epoch 9: Train Acc=0.9040, Train Loss=0.0263, Val Acc=0.8908, Val Loss=0.0310
Epoch 10: Train Acc=0.9059, Train Loss=0.0257, Val Acc=0.8914, Val Loss=0.0309


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▃▄▅▆▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_acc,▁▄▆▆▆▇▇▇██
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.90585
train_loss,0.02566
val_acc,0.8914
val_loss,0.03093


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u3v52vx5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7519, Train Loss=0.0762, Val Acc=0.7550, Val Loss=0.0755
Epoch 2: Train Acc=0.8031, Train Loss=0.0586, Val Acc=0.8066, Val Loss=0.0581
Epoch 3: Train Acc=0.8228, Train Loss=0.0509, Val Acc=0.8304, Val Loss=0.0506
Epoch 4: Train Acc=0.8348, Train Loss=0.0469, Val Acc=0.8402, Val Loss=0.0468
Epoch 5: Train Acc=0.8442, Train Loss=0.0443, Val Acc=0.8488, Val Loss=0.0444
Epoch 6: Train Acc=0.8504, Train Loss=0.0424, Val Acc=0.8542, Val Loss=0.0427
Epoch 7: Train Acc=0.8553, Train Loss=0.0410, Val Acc=0.8580, Val Loss=0.0415
Epoch 8: Train Acc=0.8589, Train Loss=0.0398, Val Acc=0.8598, Val Loss=0.0405
Epoch 9: Train Acc=0.8621, Train Loss=0.0388, Val Acc=0.8610, Val Loss=0.0397
Epoch 10: Train Acc=0.8653, Train Loss=0.0380, Val Acc=0.8622, Val Loss=0.0391


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_acc,▁▄▆▇▇▇████
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_acc,0.86529
train_loss,0.03795
val_acc,0.8622
val_loss,0.03905


[34m[1mwandb[0m: Agent Starting Run: efeb4a82 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.1999, Train Loss=0.1876, Val Acc=0.1924, Val Loss=0.1876
Epoch 2: Train Acc=0.2052, Train Loss=0.1731, Val Acc=0.1966, Val Loss=0.1730
Epoch 3: Train Acc=0.2222, Train Loss=0.1699, Val Acc=0.2154, Val Loss=0.1698
Epoch 4: Train Acc=0.2935, Train Loss=0.1677, Val Acc=0.2816, Val Loss=0.1675
Epoch 5: Train Acc=0.4009, Train Loss=0.1613, Val Acc=0.4036, Val Loss=0.1613
Epoch 6: Train Acc=0.4085, Train Loss=0.1506, Val Acc=0.4118, Val Loss=0.1506
Epoch 7: Train Acc=0.4194, Train Loss=0.1423, Val Acc=0.4244, Val Loss=0.1424
Epoch 8: Train Acc=0.4300, Train Loss=0.1364, Val Acc=0.4362, Val Loss=0.1365
Epoch 9: Train Acc=0.4420, Train Loss=0.1320, Val Acc=0.4474, Val Loss=0.1321
Epoch 10: Train Acc=0.4550, Train Loss=0.1283, Val Acc=0.4606, Val Loss=0.1284


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▂▄▇▇▇▇██
train_loss,█▆▆▆▅▄▃▂▁▁
val_acc,▁▁▂▃▇▇▇▇██
val_loss,█▆▆▆▅▄▃▂▁▁

0,1
epoch,10.0
train_acc,0.45504
train_loss,0.12831
val_acc,0.4606
val_loss,0.12844


[34m[1mwandb[0m: Agent Starting Run: 7eq6o248 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 2: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 3: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 4: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303
Epoch 5: Train Acc=0.1002, Train Loss=0.2303, Val Acc=0.0976, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁████
val_acc,▁▁▁▁▁
val_loss,▁████

0,1
epoch,5.0
train_acc,0.10022
train_loss,0.23026
val_acc,0.0976
val_loss,0.23029


[34m[1mwandb[0m: Agent Starting Run: owlsonv7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1: Train Acc=0.7213, Train Loss=0.0768, Val Acc=0.7254, Val Loss=0.0767
Epoch 2: Train Acc=0.7882, Train Loss=0.0609, Val Acc=0.7928, Val Loss=0.0616
Epoch 3: Train Acc=0.8081, Train Loss=0.0537, Val Acc=0.8114, Val Loss=0.0548
Epoch 4: Train Acc=0.8180, Train Loss=0.0498, Val Acc=0.8154, Val Loss=0.0515
Epoch 5: Train Acc=0.8301, Train Loss=0.0468, Val Acc=0.8258, Val Loss=0.0489


0,1
epoch,▁▃▅▆█
train_acc,▁▅▇▇█
train_loss,█▄▃▂▁
val_acc,▁▆▇▇█
val_loss,█▄▂▂▁

0,1
epoch,5.0
train_acc,0.83011
train_loss,0.0468
val_acc,0.8258
val_loss,0.04888


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b7ee0wmc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,█▆▄▂▁
val_acc,▁▁▁▁▁
val_loss,▁▃▅▇█

0,1
epoch,5.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23027


[34m[1mwandb[0m: Agent Starting Run: c67eu7z0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1: Train Acc=0.0999, Train Loss=0.2303, Val Acc=0.1008, Val Loss=0.2303
Epoch 2: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 3: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 4: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 5: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 6: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 7: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 8: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 9: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303
Epoch 10: Train Acc=0.1008, Train Loss=0.2303, Val Acc=0.0914, Val Loss=0.2303


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁█████████
train_loss,█▁▁▁▁▁▁▁▁▁
val_acc,█▁▁▁▁▁▁▁▁▁
val_loss,▁▆████████

0,1
epoch,10.0
train_acc,0.10078
train_loss,0.23026
val_acc,0.0914
val_loss,0.23028


[34m[1mwandb[0m: Agent Starting Run: rzyg2q6v with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: stochastic
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


[34m[1mwandb[0m: [32m[41mERROR[0m Run rzyg2q6v errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

[34m[1mwandb[0m: [32m[41mERROR[0m Run zom10e84 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-29-756b65a1eb69>", line 32, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trained_weights = Stochastic_GD(config.learning_rate, x_train, y_train, x_val, y_val, config.epochs, config.activation, config.num_layers, config.hidden_size, config.weight_init, config.batch_size, 28*28, 10, loss_function='mscross_entropy')
[34m[1mwandb[0m: [32m[41mERROR[0m                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

#Question- 10

In [17]:
def Data_Preprocess_mnist():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    # Split into train and validation
    val_size = 5000
    x_val, y_val = x_train[:val_size], y_train[:val_size]
    x_train, y_train = x_train[val_size:], y_train[val_size:]

    # Normalize dataset
    x_train, x_val, x_test = x_train / 255.0, x_val / 255.0, x_test / 255.0

    # One-hot encoding
    y_train = to_categorical(y_train, 10)
    y_val = to_categorical(y_val, 10)
    y_test = to_categorical(y_test, 10)

    return x_train, y_train, x_val, y_val, x_test, y_test

x_train_mnist, y_train_mnist, x_val_mnist, y_val_mnist, x_test_mnist, y_test_mnist = Data_Preprocess_mnist()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [19]:
# Set parameters and train
num_hidden_layer = 5
num_nodes_hidden_layers = [128]
weight = 'xavier'
input_size = 28 * 28  # Flattened image size
output_size = 10  # Number of classes
lr = 0.001
batch_size = 64
epochs = 10
activation = 'tanh'

trained_weights_mnist1 = Adam_Opt(lr, x_train_mnist, y_train_mnist, x_val_mnist, y_val_mnist, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy')


Epoch 1: Train Acc=0.9457, Train Loss=0.0179, Val Acc=0.9446, Val Loss=0.0175
Epoch 2: Train Acc=0.9638, Train Loss=0.0121, Val Acc=0.9604, Val Loss=0.0128
Epoch 3: Train Acc=0.9716, Train Loss=0.0096, Val Acc=0.9668, Val Loss=0.0110
Epoch 4: Train Acc=0.9763, Train Loss=0.0081, Val Acc=0.9698, Val Loss=0.0101
Epoch 5: Train Acc=0.9795, Train Loss=0.0070, Val Acc=0.9710, Val Loss=0.0095
Epoch 6: Train Acc=0.9816, Train Loss=0.0061, Val Acc=0.9734, Val Loss=0.0092
Epoch 7: Train Acc=0.9839, Train Loss=0.0054, Val Acc=0.9734, Val Loss=0.0090
Epoch 8: Train Acc=0.9859, Train Loss=0.0048, Val Acc=0.9748, Val Loss=0.0088
Epoch 9: Train Acc=0.9875, Train Loss=0.0042, Val Acc=0.9756, Val Loss=0.0087
Epoch 10: Train Acc=0.9886, Train Loss=0.0037, Val Acc=0.9764, Val Loss=0.0087


In [20]:
# Set parameters and train
num_hidden_layer = 4
num_nodes_hidden_layers = [64]
weight = 'xavier'
input_size = 28 * 28  # Flattened image size
output_size = 10  # Number of classes
lr = 0.001
batch_size = 16
epochs = 10
activation = 'relu'

trained_weights_mnist2 = Adam_Opt(lr, x_train_mnist, y_train_mnist, x_val_mnist, y_val_mnist, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy')


Epoch 1: Train Acc=0.9460, Train Loss=0.0177, Val Acc=0.9498, Val Loss=0.0175
Epoch 2: Train Acc=0.9657, Train Loss=0.0113, Val Acc=0.9654, Val Loss=0.0121
Epoch 3: Train Acc=0.9740, Train Loss=0.0087, Val Acc=0.9712, Val Loss=0.0105
Epoch 4: Train Acc=0.9776, Train Loss=0.0073, Val Acc=0.9724, Val Loss=0.0099
Epoch 5: Train Acc=0.9800, Train Loss=0.0064, Val Acc=0.9732, Val Loss=0.0098
Epoch 6: Train Acc=0.9813, Train Loss=0.0060, Val Acc=0.9734, Val Loss=0.0101
Epoch 7: Train Acc=0.9829, Train Loss=0.0053, Val Acc=0.9738, Val Loss=0.0099
Epoch 8: Train Acc=0.9842, Train Loss=0.0048, Val Acc=0.9718, Val Loss=0.0100
Epoch 9: Train Acc=0.9849, Train Loss=0.0045, Val Acc=0.9706, Val Loss=0.0102
Epoch 10: Train Acc=0.9855, Train Loss=0.0043, Val Acc=0.9702, Val Loss=0.0106


In [21]:
# Set parameters and train
num_hidden_layer = 3
num_nodes_hidden_layers = [64]
weight = 'xavier'
input_size = 28 * 28
output_size = 10
lr = 0.001
batch_size = 32
epochs = 10
activation = 'tanh'

trained_weights_mnist3 = Adam_Opt(lr, x_train_mnist, y_train_mnist, x_val_mnist, y_val_mnist, epochs, activation, num_hidden_layer, num_nodes_hidden_layers, weight, batch_size, input_size, output_size, beta1=0.9, beta2=0.999, epsilon=1e-8, loss_function='cross_entropy')


Epoch 1: Train Acc=0.9455, Train Loss=0.0187, Val Acc=0.9470, Val Loss=0.0180
Epoch 2: Train Acc=0.9592, Train Loss=0.0140, Val Acc=0.9600, Val Loss=0.0139
Epoch 3: Train Acc=0.9666, Train Loss=0.0115, Val Acc=0.9642, Val Loss=0.0121
Epoch 4: Train Acc=0.9715, Train Loss=0.0099, Val Acc=0.9672, Val Loss=0.0111
Epoch 5: Train Acc=0.9749, Train Loss=0.0087, Val Acc=0.9684, Val Loss=0.0104
Epoch 6: Train Acc=0.9778, Train Loss=0.0077, Val Acc=0.9698, Val Loss=0.0100
Epoch 7: Train Acc=0.9799, Train Loss=0.0070, Val Acc=0.9710, Val Loss=0.0097
Epoch 8: Train Acc=0.9816, Train Loss=0.0063, Val Acc=0.9708, Val Loss=0.0094
Epoch 9: Train Acc=0.9833, Train Loss=0.0058, Val Acc=0.9724, Val Loss=0.0093
Epoch 10: Train Acc=0.9848, Train Loss=0.0053, Val Acc=0.9714, Val Loss=0.0092
