# MLP with RMSprop Using Tensorflow - Tuning

In [1]:
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.optimizers.legacy import RMSprop
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
import wandb
from pprint import pprint

from tf_utils import create_model, train_model

## Login to Wandb

In [2]:
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

## Load Fashion-MNIST

In [3]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


## Define sweep agent function

In [4]:
def sweep_train():
    default_config = {
        'dense_1': 512,
        'dense_2': 256,
        'dense_3': 256,
        'dense_4': 64,
        'learning_rate': 0.001,
        'momentum': 0.0,
        'rho': 0.9,
        'epochs': 50,
        'batch_size': 128,
    }

    wandb.init(config=default_config)

    config = wandb.config

    mlp = create_model(config)
    mlp.summary()
    mlp.compile(
        optimizer=RMSprop(learning_rate=config.learning_rate, momentum=config.momentum, rho=config.rho),
        loss=SparseCategoricalCrossentropy(),
        metrics=[SparseCategoricalAccuracy()]
    )
    _ = train_model(mlp, config, x_train, y_train, verbose=0)

## Configure Wandb Sweep for hyperparameter tuning

In [6]:
sweep_config = {
    'name': 'tf-rmsprop-sweep',
    'method': 'bayes',
    'metric': {
        'name': 'val_loss',
        'goal': 'minimize'
    },
    'early_terminate':{
        'type': 'hyperband',
        'min_iter': 2
    },
    'parameters': {
        'batch_size': {
            'values': [16, 32, 64, 128, 256]
        },
        'epochs': {
            'min': 15,
            'max': 50
        },
        'learning_rate': {
            'min': 0.0001,
            'max': 0.1
        },
        'momentum': {
            'min': 0.0,
            'max': 0.9
        },
        'rho': {
            'min': 0.8,
            'max': 0.999
        },
        'dense_1': {
            'min': 256,
            'max': 512
        },
        'dense_2': {
            'min': 128,
            'max': 256
        },
        'dense_3': {
            'min': 128,
            'max': 256
        },
        'dense_4': {
            'min': 32,
            'max': 128
        }
    }
}

pprint(sweep_config)

sweep_id = wandb.sweep(sweep_config, project='tf_mlp')

{'early_terminate': {'min_iter': 2, 'type': 'hyperband'},
 'method': 'bayes',
 'metric': {'goal': 'minimize', 'name': 'val_loss'},
 'name': 'tf-rmsprop-sweep',
 'parameters': {'batch_size': {'values': [16, 32, 64, 128, 256]},
                'dense_1': {'max': 512, 'min': 256},
                'dense_2': {'max': 256, 'min': 128},
                'dense_3': {'max': 256, 'min': 128},
                'dense_4': {'max': 128, 'min': 32},
                'epochs': {'max': 50, 'min': 15},
                'learning_rate': {'max': 0.1, 'min': 0.0001},
                'momentum': {'max': 0.9, 'min': 0.0},
                'rho': {'max': 0.999, 'min': 0.8}}}
Create sweep with ID: 9v74y3nr
Sweep URL: https://wandb.ai/nsiete23/tf_mlp/sweeps/9v74y3nr


## Run sweep

In [7]:
wandb.agent(sweep_id, function=sweep_train, count=50)

[34m[1mwandb[0m: Agent Starting Run: aemtzbdi with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 278
[34m[1mwandb[0m: 	dense_2: 208
[34m[1mwandb[0m: 	dense_3: 192
[34m[1mwandb[0m: 	dense_4: 90
[34m[1mwandb[0m: 	epochs: 21
[34m[1mwandb[0m: 	learning_rate: 0.08327647189609565
[34m[1mwandb[0m: 	momentum: 0.08083622947220044
[34m[1mwandb[0m: 	rho: 0.8703079889871562
[34m[1mwandb[0m: Currently logged in as: [33mv-modroczky[0m ([33mnsiete23[0m). Use [1m`wandb login --relogin`[0m to force relogin


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 278)               218230    
                                                                 
 dense_1 (Dense)             (None, 208)               58032     
                                                                 
 dense_2 (Dense)             (None, 192)               40128     
                                                                 
 dense_3 (Dense)             (None, 90)                17370     
                                                                 
 dense_4 (Dense)             (None, 10)                9

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,█▁▂▁▁▁▁▁▁▁▁▁▁▂▁▂▁▁▁▁▁
val_loss,▂▄█▃█▂▄▃▂▁▃▇▄▆▂▆▃▅▃▃▇
val_sparse_categorical_accuracy,▅▅▅▃▃▆▃▅██▆▅▅▃▁▅▁▆▆▃▃

0,1
GFLOPS,0.00033
best_epoch,9.0
best_val_loss,2.30516
epoch,20.0
loss,2.30715
sparse_categorical_accuracy,0.10023
val_loss,2.31358
val_sparse_categorical_accuracy,0.09833


[34m[1mwandb[0m: Agent Starting Run: stoz8iqq with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 349
[34m[1mwandb[0m: 	dense_2: 241
[34m[1mwandb[0m: 	dense_3: 148
[34m[1mwandb[0m: 	dense_4: 37
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	learning_rate: 0.01217227859839216
[34m[1mwandb[0m: 	momentum: 0.10856137222328777
[34m[1mwandb[0m: 	rho: 0.8583339085500451


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 349)               273965    
                                                                 
 dense_1 (Dense)             (None, 241)               84350     
                                                                 
 dense_2 (Dense)             (None, 148)               35816     
                                                                 
 dense_3 (Dense)             (None, 37)                5513      
                                                                 
 dense_4 (Dense)             (None, 10)                3

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇████████████
val_loss,▅▄▆▄▂█▃▁▂▄▂▅▂▃▃▁▂
val_sparse_categorical_accuracy,▁▁▂▄▇▂▅█▇▆▇▆▇▇▅█▇

0,1
GFLOPS,0.0004
best_epoch,7.0
best_val_loss,0.424
epoch,16.0
loss,0.388
sparse_categorical_accuracy,0.86931
val_loss,0.48543
val_sparse_categorical_accuracy,0.84792


[34m[1mwandb[0m: Agent Starting Run: imh7sboz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_1: 463
[34m[1mwandb[0m: 	dense_2: 163
[34m[1mwandb[0m: 	dense_3: 183
[34m[1mwandb[0m: 	dense_4: 57
[34m[1mwandb[0m: 	epochs: 27
[34m[1mwandb[0m: 	learning_rate: 0.05458373179595338
[34m[1mwandb[0m: 	momentum: 0.3814439264263261
[34m[1mwandb[0m: 	rho: 0.9841393790585384


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 463)               363455    
                                                                 
 dense_1 (Dense)             (None, 163)               75632     
                                                                 
 dense_2 (Dense)             (None, 183)               30012     
                                                                 
 dense_3 (Dense)             (None, 57)                10488     
                                                                 
 dense_4 (Dense)             (None, 10)                5

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▅▃▃▆▇▄▄▃▃▅█▁▃▅▄▆▅▅▂▃▅▄▂▆▄▃▄
val_loss,▃▃▄▂▃▂▅▂▂▃▃▃▁▃▂█▃▃▂▃▅▄▃▄▄▅▂
val_sparse_categorical_accuracy,▅▃▃█▆▆▅▅▁▁█▅█▆█▅█▁█▁▁▃▅▅▃▁▅

0,1
GFLOPS,0.00048
best_epoch,12.0
best_val_loss,2.30383
epoch,26.0
loss,2.30827
sparse_categorical_accuracy,0.09938
val_loss,2.30526
val_sparse_categorical_accuracy,0.10033


[34m[1mwandb[0m: Agent Starting Run: bq18hv8j with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 474
[34m[1mwandb[0m: 	dense_2: 129
[34m[1mwandb[0m: 	dense_3: 167
[34m[1mwandb[0m: 	dense_4: 45
[34m[1mwandb[0m: 	epochs: 18
[34m[1mwandb[0m: 	learning_rate: 0.06467285371845305
[34m[1mwandb[0m: 	momentum: 0.3621442305158925
[34m[1mwandb[0m: 	rho: 0.8924118632848634


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 474)               372090    
                                                                 
 dense_1 (Dense)             (None, 129)               61275     
                                                                 
 dense_2 (Dense)             (None, 167)               21710     
                                                                 
 dense_3 (Dense)             (None, 45)                7560      
                                                                 
 dense_4 (Dense)             (None, 10)                4

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▄▃▃▅▄▄█▆▄▆▃▄▄▁▄▅▄▂
val_loss,▂▄▄▂▄▄▃▆▁▄▄▃▂▁▃▄▄█
val_sparse_categorical_accuracy,▃▃▅▆▆▅▁█▅▅▁▃▆▃▆▆▁▅

0,1
GFLOPS,0.00046
best_epoch,13.0
best_val_loss,2.30522
epoch,17.0
loss,2.30708
sparse_categorical_accuracy,0.09775
val_loss,2.31763
val_sparse_categorical_accuracy,0.10033


[34m[1mwandb[0m: Agent Starting Run: xwdo93k9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_1: 302
[34m[1mwandb[0m: 	dense_2: 256
[34m[1mwandb[0m: 	dense_3: 165
[34m[1mwandb[0m: 	dense_4: 33
[34m[1mwandb[0m: 	epochs: 22
[34m[1mwandb[0m: 	learning_rate: 0.01880595289682616
[34m[1mwandb[0m: 	momentum: 0.28782322374795966
[34m[1mwandb[0m: 	rho: 0.8596431372947871


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 302)               237070    
                                                                 
 dense_1 (Dense)             (None, 256)               77568     
                                                                 
 dense_2 (Dense)             (None, 165)               42405     
                                                                 
 dense_3 (Dense)             (None, 33)                5478      
                                                                 
 dense_4 (Dense)             (None, 10)                3

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
loss,█▂▁▁▁▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▂▂
sparse_categorical_accuracy,▁▇▇▇█▇▅▆▇█████▇▇▇█▇█▇▇
val_loss,▃▄▂▃▂▄▇▃▁█▅▆▃▃▇▁█▄▅▆▅▂
val_sparse_categorical_accuracy,▇▂▇▄▇▆▁██▇▄▆▇█▂█▄▃▂▅▇█

0,1
GFLOPS,0.00036
best_epoch,15.0
best_val_loss,0.65377
epoch,21.0
loss,0.7274
sparse_categorical_accuracy,0.77142
val_loss,0.69305
val_sparse_categorical_accuracy,0.79808


[34m[1mwandb[0m: Agent Starting Run: u171q77d with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_1: 382
[34m[1mwandb[0m: 	dense_2: 237
[34m[1mwandb[0m: 	dense_3: 147
[34m[1mwandb[0m: 	dense_4: 32
[34m[1mwandb[0m: 	epochs: 19
[34m[1mwandb[0m: 	learning_rate: 0.008360766348959837
[34m[1mwandb[0m: 	momentum: 0.010535267791107462
[34m[1mwandb[0m: 	rho: 0.8578567709086986


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 382)               299870    
                                                                 
 dense_1 (Dense)             (None, 237)               90771     
                                                                 
 dense_2 (Dense)             (None, 147)               34986     
                                                                 
 dense_3 (Dense)             (None, 32)                4736      
                                                                 
 dense_4 (Dense)             (None, 10)                3

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██
loss,▂▁▁▁▁▁▁▁▁▂▂▂▂▃▄▆▆██
sparse_categorical_accuracy,▆████████▇▇▇▇▆▅▃▂▂▁
val_loss,▁▁▁▁▁▁▁▁▅▁▂▂▂█▂▃▃▃▃
val_sparse_categorical_accuracy,▇▇██████▇█▆▇█▆▅▃▁▁▁

0,1
GFLOPS,0.00043
best_epoch,6.0
best_val_loss,0.49946
epoch,18.0
loss,2.36404
sparse_categorical_accuracy,0.42973
val_loss,1.59231
val_sparse_categorical_accuracy,0.39283


[34m[1mwandb[0m: Agent Starting Run: wwgga1yd with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 311
[34m[1mwandb[0m: 	dense_2: 238
[34m[1mwandb[0m: 	dense_3: 129
[34m[1mwandb[0m: 	dense_4: 38
[34m[1mwandb[0m: 	epochs: 27
[34m[1mwandb[0m: 	learning_rate: 0.007199572771229585
[34m[1mwandb[0m: 	momentum: 0.325748377729711
[34m[1mwandb[0m: 	rho: 0.8444886374843534


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 311)               244135    
                                                                 
 dense_1 (Dense)             (None, 238)               74256     
                                                                 
 dense_2 (Dense)             (None, 129)               30831     
                                                                 
 dense_3 (Dense)             (None, 38)                4940      
                                                                 
 dense_4 (Dense)             (None, 10)                3

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇████████████████████
val_loss,▃▂▂▃▂▂▃▂▂▁▂▂▁▁▂▂▂▃▂▂▂▃▃▂▃█▂
val_sparse_categorical_accuracy,▁▅▅▃▅▅▂▆▆▇▇▅██▆▆▅▅▇▇█▄▆▆▇▁█

0,1
GFLOPS,0.00035
best_epoch,12.0
best_val_loss,0.39455
epoch,26.0
loss,0.34851
sparse_categorical_accuracy,0.88262
val_loss,0.48784
val_sparse_categorical_accuracy,0.87333


[34m[1mwandb[0m: Agent Starting Run: 3511mpna with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 292
[34m[1mwandb[0m: 	dense_2: 246
[34m[1mwandb[0m: 	dense_3: 187
[34m[1mwandb[0m: 	dense_4: 78
[34m[1mwandb[0m: 	epochs: 36
[34m[1mwandb[0m: 	learning_rate: 0.004025579531438728
[34m[1mwandb[0m: 	momentum: 0.5810420065050655
[34m[1mwandb[0m: 	rho: 0.8011256363807325


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 292)               229220    
                                                                 
 dense_1 (Dense)             (None, 246)               72078     
                                                                 
 dense_2 (Dense)             (None, 187)               46189     
                                                                 
 dense_3 (Dense)             (None, 78)                14664     
                                                                 
 dense_4 (Dense)             (None, 10)                7

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▃▂▂▂▂▃▃▃▄
sparse_categorical_accuracy,▁▆▇▇▇▇████████████████████▇▇██▇▇▇▇▇▇
val_loss,▃▁▂▄▁▁▁▁▁▁▂▃▁▃▂▃▃▂▂▅▂▄▂▄▄▃█▄▇▅▅▅▄█▄▄
val_sparse_categorical_accuracy,▂▆▄▄▆▇▇█▆▇▇▄▇▆▇▆▅▇▅▇▇█▆▇▇▆▆▆▅▃▇▆▃▁▆▄

0,1
GFLOPS,0.00036
best_epoch,1.0
best_val_loss,0.41464
epoch,35.0
loss,0.51774
sparse_categorical_accuracy,0.86256
val_loss,0.70371
val_sparse_categorical_accuracy,0.83567


[34m[1mwandb[0m: Agent Starting Run: cm6l30ke with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 320
[34m[1mwandb[0m: 	dense_2: 243
[34m[1mwandb[0m: 	dense_3: 160
[34m[1mwandb[0m: 	dense_4: 52
[34m[1mwandb[0m: 	epochs: 32
[34m[1mwandb[0m: 	learning_rate: 0.0144763824410983
[34m[1mwandb[0m: 	momentum: 0.8903660810280439
[34m[1mwandb[0m: 	rho: 0.8281803726316771


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 320)               251200    
                                                                 
 dense_1 (Dense)             (None, 243)               78003     
                                                                 
 dense_2 (Dense)             (None, 160)               39040     
                                                                 
 dense_3 (Dense)             (None, 52)                8372      
                                                                 
 dense_4 (Dense)             (None, 10)                5

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇███
loss,▄▁▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▂▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,█▄▂▁▄▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂
val_loss,█▂▂▂▂▁▁▁▂▂▂▃▂▂▂▁▁▁▂▁▁▁▁▂▂▂▂▂▂▂▂▂
val_sparse_categorical_accuracy,▂█▄▄▄▄▃▅▄▄▃▁▃▄▄▅▃▄▄▅▅▄▄▄▃▄▅▄▅▁▄▄

0,1
GFLOPS,0.00038
best_epoch,22.0
best_val_loss,1.70293
epoch,31.0
loss,1.79651
sparse_categorical_accuracy,0.19883
val_loss,1.80688
val_sparse_categorical_accuracy,0.19483


[34m[1mwandb[0m: Agent Starting Run: 0b7i4mmp with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 278
[34m[1mwandb[0m: 	dense_2: 250
[34m[1mwandb[0m: 	dense_3: 157
[34m[1mwandb[0m: 	dense_4: 51
[34m[1mwandb[0m: 	epochs: 38
[34m[1mwandb[0m: 	learning_rate: 0.03880061034994843
[34m[1mwandb[0m: 	momentum: 0.05063768420812496
[34m[1mwandb[0m: 	rho: 0.8023360271856302


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 278)               218230    
                                                                 
 dense_1 (Dense)             (None, 250)               69750     
                                                                 
 dense_2 (Dense)             (None, 157)               39407     
                                                                 
 dense_3 (Dense)             (None, 51)                8058      
                                                                 
 dense_4 (Dense)             (None, 10)                5

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▆▇▇▇▆▆▇▇███████████▇█████████████
val_loss,█▆█▅▄▄▃▆▆▅▄▄▃▁▁▂▃▁▂▁▁▄▂▂▃▇▄▃▁▄▂▂▃▃▄▂▂▂
val_sparse_categorical_accuracy,▁▅▂▅▅▅▅▄▅▃▆▅▆▇▇▇▆█▇██▄██▇▇▄▇█▆██▇▇▆█▇█

0,1
GFLOPS,0.00034
best_epoch,17.0
best_val_loss,0.65958
epoch,37.0
loss,0.68519
sparse_categorical_accuracy,0.75815
val_loss,0.68954
val_sparse_categorical_accuracy,0.77475


[34m[1mwandb[0m: Agent Starting Run: knnrdjww with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 305
[34m[1mwandb[0m: 	dense_2: 226
[34m[1mwandb[0m: 	dense_3: 185
[34m[1mwandb[0m: 	dense_4: 86
[34m[1mwandb[0m: 	epochs: 41
[34m[1mwandb[0m: 	learning_rate: 0.009427385386421709
[34m[1mwandb[0m: 	momentum: 0.2797902166155888
[34m[1mwandb[0m: 	rho: 0.8321748314323335


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 305)               239425    
                                                                 
 dense_1 (Dense)             (None, 226)               69156     
                                                                 
 dense_2 (Dense)             (None, 185)               41995     
                                                                 
 dense_3 (Dense)             (None, 86)                15996     
                                                                 
 dense_4 (Dense)             (None, 10)                8

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇▇█▇███████████████████████████████
val_loss,▃▃▂▂▂▂▆▂▄▂▃▂▅▁▃▂▂▂▂▁▂▄▄▃▃█▃▄▃▂▄▂▃▃▄▃▃▇▅▄
val_sparse_categorical_accuracy,▁▄▆▅▄▆▃▅▄▇▅▇▃█▅▇▇▇▇▇▇▄▆▆▅▅▇█▅▇▅██▆▅▆▇▇▄█

0,1
GFLOPS,0.00037
best_epoch,13.0
best_val_loss,0.40585
epoch,40.0
loss,0.41192
sparse_categorical_accuracy,0.86721
val_loss,0.5954
val_sparse_categorical_accuracy,0.86217


[34m[1mwandb[0m: Agent Starting Run: gptzdl0w with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_1: 291
[34m[1mwandb[0m: 	dense_2: 210
[34m[1mwandb[0m: 	dense_3: 223
[34m[1mwandb[0m: 	dense_4: 40
[34m[1mwandb[0m: 	epochs: 44
[34m[1mwandb[0m: 	learning_rate: 0.011024057629431807
[34m[1mwandb[0m: 	momentum: 0.25733191320033955
[34m[1mwandb[0m: 	rho: 0.8288577338228731


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 291)               228435    
                                                                 
 dense_1 (Dense)             (None, 210)               61320     
                                                                 
 dense_2 (Dense)             (None, 223)               47053     
                                                                 
 dense_3 (Dense)             (None, 40)                8960      
                                                                 
 dense_4 (Dense)             (None, 10)                4

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,█▂▂▁▁▁▁▁▁▂▁▂▂▂▁▁▂▂▁▁▂▃▂▃▂▂▃▂▃▃▄▄▄▄▄▅▅▄▇█
sparse_categorical_accuracy,▁▆▇▇████████████████████████▇▇▇▇▇▇▇▆▆▆▄▃
val_loss,▁▁▁▁▁▁▂▁▁▁▁▁▂▁▁▂▄▁▁▂▁▂▄▂▂▂▂▃▂▃▃▂▂▂▃▂▃▃▄█
val_sparse_categorical_accuracy,▅▅▆▆▇▇▆▇████▇█▇▇▄▇█▇█▇▇▆██▇▇▇▅▅▅▇▅▇▇▃▂▃▁

0,1
GFLOPS,0.00035
best_epoch,14.0
best_val_loss,0.48649
epoch,43.0
loss,0.91272
sparse_categorical_accuracy,0.73827
val_loss,1.8582
val_sparse_categorical_accuracy,0.70783


[34m[1mwandb[0m: Agent Starting Run: ybaxs09u with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 337
[34m[1mwandb[0m: 	dense_2: 244
[34m[1mwandb[0m: 	dense_3: 187
[34m[1mwandb[0m: 	dense_4: 60
[34m[1mwandb[0m: 	epochs: 33
[34m[1mwandb[0m: 	learning_rate: 0.01902865457488253
[34m[1mwandb[0m: 	momentum: 0.21410472406620729
[34m[1mwandb[0m: 	rho: 0.8114988895197564


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 337)               264545    
                                                                 
 dense_1 (Dense)             (None, 244)               82472     
                                                                 
 dense_2 (Dense)             (None, 187)               45815     
                                                                 
 dense_3 (Dense)             (None, 60)                11280     
                                                                 
 dense_4 (Dense)             (None, 10)                6

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇███
loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇█████████████████████████
val_loss,▇▆▄▅▃▂▃█▄▅▃▃▄▅▅▆▂▁▃▃▄▃▃▆▃▅▂▆▄▆▅▃▃
val_sparse_categorical_accuracy,▂▁▅▄▅▆▆▄▆▅▆▆▅▆▅▆▇██▇▆▆▇▄▆▆▇▅▇▆▇▅█

0,1
GFLOPS,0.0004
best_epoch,17.0
best_val_loss,0.43885
epoch,32.0
loss,0.43249
sparse_categorical_accuracy,0.85565
val_loss,0.5176
val_sparse_categorical_accuracy,0.85275


[34m[1mwandb[0m: Agent Starting Run: y95pehca with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 260
[34m[1mwandb[0m: 	dense_2: 254
[34m[1mwandb[0m: 	dense_3: 187
[34m[1mwandb[0m: 	dense_4: 86
[34m[1mwandb[0m: 	epochs: 42
[34m[1mwandb[0m: 	learning_rate: 0.033222574882564526
[34m[1mwandb[0m: 	momentum: 0.17409783751439709
[34m[1mwandb[0m: 	rho: 0.8038586407119888


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 260)               204100    
                                                                 
 dense_1 (Dense)             (None, 254)               66294     
                                                                 
 dense_2 (Dense)             (None, 187)               47685     
                                                                 
 dense_3 (Dense)             (None, 86)                16168     
                                                                 
 dense_4 (Dense)             (None, 10)                8

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▆▆▆▆▆▆▆▇█▇▇██████████████▇█████████
val_loss,▆▇▃▃▃▃▃▃▆▅▄▃▁▂▁▂▂▁▂▃▂▂▂▁▁▂▂▁▂▁▂▂▃▁▃▂█▂▂▂
val_sparse_categorical_accuracy,▂▁▅▅▆▄▅▅▃▃▅▅▇▇▇▇▇▇▇▆▇▇▆███▇█▇█▇▇▇▇▇▇▆▇▆▇

0,1
GFLOPS,0.00034
best_epoch,12.0
best_val_loss,0.63964
epoch,41.0
loss,0.66644
sparse_categorical_accuracy,0.76619
val_loss,0.71248
val_sparse_categorical_accuracy,0.75858


[34m[1mwandb[0m: Agent Starting Run: 2m4nu46n with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 264
[34m[1mwandb[0m: 	dense_2: 231
[34m[1mwandb[0m: 	dense_3: 196
[34m[1mwandb[0m: 	dense_4: 111
[34m[1mwandb[0m: 	epochs: 19
[34m[1mwandb[0m: 	learning_rate: 0.004232492337378657
[34m[1mwandb[0m: 	momentum: 0.193851837678287
[34m[1mwandb[0m: 	rho: 0.8320137539766757


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 264)               207240    
                                                                 
 dense_1 (Dense)             (None, 231)               61215     
                                                                 
 dense_2 (Dense)             (None, 196)               45472     
                                                                 
 dense_3 (Dense)             (None, 111)               21867     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██
loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▇▇▇▇████████████
val_loss,▄▂▂▃▁▂▂▁▁▂▂█▃▃▂▂▄▂▃
val_sparse_categorical_accuracy,▁▅▆▄▇▆▇▇▇▆█▃▇▇▇▇▆██

0,1
GFLOPS,0.00034
best_epoch,7.0
best_val_loss,0.36998
epoch,18.0
loss,0.29716
sparse_categorical_accuracy,0.89629
val_loss,0.49003
val_sparse_categorical_accuracy,0.87142


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: to0ikbrs with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 291
[34m[1mwandb[0m: 	dense_2: 235
[34m[1mwandb[0m: 	dense_3: 151
[34m[1mwandb[0m: 	dense_4: 128
[34m[1mwandb[0m: 	epochs: 26
[34m[1mwandb[0m: 	learning_rate: 0.00939380459259902
[34m[1mwandb[0m: 	momentum: 0.4958020979810359
[34m[1mwandb[0m: 	rho: 0.8532941568442742


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 291)               228435    
                                                                 
 dense_1 (Dense)             (None, 235)               68620     
                                                                 
 dense_2 (Dense)             (None, 151)               35636     
                                                                 
 dense_3 (Dense)             (None, 128)               19456     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇▇███████████████████
val_loss,▅█▃▂▃▁▂▃▂█▃▂▂▁▆▄▄▄▅▅▆▂▅▅▄▃
val_sparse_categorical_accuracy,▃▁▄▆▆▇▇▆▇▅▇█▇█▇▇▆█▇▆▅▇█▇██

0,1
GFLOPS,0.00035
best_epoch,5.0
best_val_loss,0.44676
epoch,25.0
loss,0.39167
sparse_categorical_accuracy,0.8684
val_loss,0.49752
val_sparse_categorical_accuracy,0.8655


[34m[1mwandb[0m: Agent Starting Run: f6pxh44t with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 367
[34m[1mwandb[0m: 	dense_2: 248
[34m[1mwandb[0m: 	dense_3: 141
[34m[1mwandb[0m: 	dense_4: 125
[34m[1mwandb[0m: 	epochs: 47
[34m[1mwandb[0m: 	learning_rate: 0.011679463815768376
[34m[1mwandb[0m: 	momentum: 0.030047801611070825
[34m[1mwandb[0m: 	rho: 0.8020072408851837


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 367)               288095    
                                                                 
 dense_1 (Dense)             (None, 248)               91264     
                                                                 
 dense_2 (Dense)             (None, 141)               35109     
                                                                 
 dense_3 (Dense)             (None, 125)               17750     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇██████████████████████████████████
val_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▁█▂▂▂▁▂▁▁▂
val_sparse_categorical_accuracy,▁▄▃▇▆▇▆▇█▇▇█▅▇▇▇▇▆███▇▇▅▆██▄▇▇▇▁▆▅▆█▆▅▇█

0,1
GFLOPS,0.00043
best_epoch,8.0
best_val_loss,0.43017
epoch,46.0
loss,0.42069
sparse_categorical_accuracy,0.868
val_loss,0.82112
val_sparse_categorical_accuracy,0.86808


[34m[1mwandb[0m: Agent Starting Run: e4yg0nov with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 275
[34m[1mwandb[0m: 	dense_2: 249
[34m[1mwandb[0m: 	dense_3: 183
[34m[1mwandb[0m: 	dense_4: 95
[34m[1mwandb[0m: 	epochs: 22
[34m[1mwandb[0m: 	learning_rate: 0.004992262901708697
[34m[1mwandb[0m: 	momentum: 0.4186123495973325
[34m[1mwandb[0m: 	rho: 0.8074471387886601


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 275)               215875    
                                                                 
 dense_1 (Dense)             (None, 249)               68724     
                                                                 
 dense_2 (Dense)             (None, 183)               45750     
                                                                 
 dense_3 (Dense)             (None, 95)                17480     
                                                                 
 dense_4 (Dense)             (None, 10)                9

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇████████████████
val_loss,▇▅▃▃▃▂▆▄▁▁▂▅▂▂▄▂▃▅▆█▄▅
val_sparse_categorical_accuracy,▁▂▅▆▅▇▄▆▇▇▇▆▇▇▇█▆▅▇▆▇▆

0,1
GFLOPS,0.00035
best_epoch,8.0
best_val_loss,0.39897
epoch,21.0
loss,0.35989
sparse_categorical_accuracy,0.88025
val_loss,0.51613
val_sparse_categorical_accuracy,0.85558


[34m[1mwandb[0m: Agent Starting Run: 86m36x5z with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 400
[34m[1mwandb[0m: 	dense_2: 240
[34m[1mwandb[0m: 	dense_3: 134
[34m[1mwandb[0m: 	dense_4: 106
[34m[1mwandb[0m: 	epochs: 23
[34m[1mwandb[0m: 	learning_rate: 0.016441050028720133
[34m[1mwandb[0m: 	momentum: 0.13926200379294731
[34m[1mwandb[0m: 	rho: 0.8576654282808038


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 400)               314000    
                                                                 
 dense_1 (Dense)             (None, 240)               96240     
                                                                 
 dense_2 (Dense)             (None, 134)               32294     
                                                                 
 dense_3 (Dense)             (None, 106)               14310     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇██
loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇▇██████████████
val_loss,█▃▄▄▂▂▂▂▁▂▂▂▂▂▁▁▃▂▃▃▃▃▃
val_sparse_categorical_accuracy,▁▅▅▅▆▇▇▇█▇█▇▆▇██▇█▇▇█▇▇

0,1
GFLOPS,0.00046
best_epoch,8.0
best_val_loss,0.46752
epoch,22.0
loss,0.43282
sparse_categorical_accuracy,0.855
val_loss,0.55925
val_sparse_categorical_accuracy,0.82542


[34m[1mwandb[0m: Agent Starting Run: d63fl4p6 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 272
[34m[1mwandb[0m: 	dense_2: 256
[34m[1mwandb[0m: 	dense_3: 142
[34m[1mwandb[0m: 	dense_4: 113
[34m[1mwandb[0m: 	epochs: 44
[34m[1mwandb[0m: 	learning_rate: 0.017613858313455237
[34m[1mwandb[0m: 	momentum: 0.15126000752388483
[34m[1mwandb[0m: 	rho: 0.8830865646178486


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 272)               213520    
                                                                 
 dense_1 (Dense)             (None, 256)               69888     
                                                                 
 dense_2 (Dense)             (None, 142)               36494     
                                                                 
 dense_3 (Dense)             (None, 113)               16159     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▅▆▆▆▇▇▇▇▇▇▇███████████████████████████
val_loss,█▄▄▃▄▂▃▃▃▂▃▃▃▂▂▂▁▂▂▃▁▃▁▃▂▃▃▁▂▂▂▂▂▃▁▁▂▃▄▂
val_sparse_categorical_accuracy,▁▄▅▅▄▆▆▆▆▅▇▆▆▇▇▇▇▆▇▆▇▇█▆▇▇▇█▇█▇█▇▇█▇▇▇▆█

0,1
GFLOPS,0.00034
best_epoch,24.0
best_val_loss,0.45905
epoch,43.0
loss,0.38354
sparse_categorical_accuracy,0.86856
val_loss,0.49121
val_sparse_categorical_accuracy,0.85317


[34m[1mwandb[0m: Agent Starting Run: 0okbqbmb with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dense_1: 275
[34m[1mwandb[0m: 	dense_2: 243
[34m[1mwandb[0m: 	dense_3: 145
[34m[1mwandb[0m: 	dense_4: 128
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	learning_rate: 0.009619829673567185
[34m[1mwandb[0m: 	momentum: 0.4282640165609823
[34m[1mwandb[0m: 	rho: 0.8190166816343001


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 275)               215875    
                                                                 
 dense_1 (Dense)             (None, 243)               67068     
                                                                 
 dense_2 (Dense)             (None, 145)               35380     
                                                                 
 dense_3 (Dense)             (None, 128)               18688     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
loss,█▆▆▁▄
sparse_categorical_accuracy,▁▅▅█▅
val_loss,█▃▁▄▃
val_sparse_categorical_accuracy,▁▂▇██

0,1
GFLOPS,0.00034
best_epoch,2.0
best_val_loss,0.9657
epoch,4.0
loss,0.94429
sparse_categorical_accuracy,0.69337
val_loss,1.01238
val_sparse_categorical_accuracy,0.72158


[34m[1mwandb[0m: Agent Starting Run: q3h8o0eo with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 296
[34m[1mwandb[0m: 	dense_2: 246
[34m[1mwandb[0m: 	dense_3: 210
[34m[1mwandb[0m: 	dense_4: 73
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	learning_rate: 0.0011634346055427068
[34m[1mwandb[0m: 	momentum: 0.03419104970394583
[34m[1mwandb[0m: 	rho: 0.9723998288381118


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 296)               232360    
                                                                 
 dense_1 (Dense)             (None, 246)               73062     
                                                                 
 dense_2 (Dense)             (None, 210)               51870     
                                                                 
 dense_3 (Dense)             (None, 73)                15403     
                                                                 
 dense_4 (Dense)             (None, 10)                7

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
loss,█▅▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇██████████
val_loss,█▆▃▂▂▂▄▂▁▂▁▁▄▁▂▂▂▁▃▁▃▄▃▂▃▃▂▃▃▄
val_sparse_categorical_accuracy,▁▃▅▆▆▇▆▇▇▆▇▇▆█▇▇██▇█▇▇███▇███▇

0,1
GFLOPS,0.00037
best_epoch,13.0
best_val_loss,0.31691
epoch,29.0
loss,0.12978
sparse_categorical_accuracy,0.94894
val_loss,0.44485
val_sparse_categorical_accuracy,0.88192


[34m[1mwandb[0m: Agent Starting Run: 1sv66u5b with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_1: 261
[34m[1mwandb[0m: 	dense_2: 253
[34m[1mwandb[0m: 	dense_3: 147
[34m[1mwandb[0m: 	dense_4: 41
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	learning_rate: 0.00946804635337442
[34m[1mwandb[0m: 	momentum: 0.011431178114825136
[34m[1mwandb[0m: 	rho: 0.933681653467632


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 261)               204885    
                                                                 
 dense_1 (Dense)             (None, 253)               66286     
                                                                 
 dense_2 (Dense)             (None, 147)               37338     
                                                                 
 dense_3 (Dense)             (None, 41)                6068      
                                                                 
 dense_4 (Dense)             (None, 10)                4

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▇▇▇████████
val_loss,█▅▄▄▃▂▂▂▂▁▂▄▃▂▁
val_sparse_categorical_accuracy,▁▄▆▆▇▇▇█▇██▇▇██

0,1
GFLOPS,0.00032
best_epoch,14.0
best_val_loss,0.41966
epoch,14.0
loss,0.39212
sparse_categorical_accuracy,0.8631
val_loss,0.41966
val_sparse_categorical_accuracy,0.85883


[34m[1mwandb[0m: Agent Starting Run: ik63ty2c with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 294
[34m[1mwandb[0m: 	dense_2: 232
[34m[1mwandb[0m: 	dense_3: 174
[34m[1mwandb[0m: 	dense_4: 127
[34m[1mwandb[0m: 	epochs: 31
[34m[1mwandb[0m: 	learning_rate: 0.00682158199539508
[34m[1mwandb[0m: 	momentum: 0.2671265310346287
[34m[1mwandb[0m: 	rho: 0.856634262554182


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 294)               230790    
                                                                 
 dense_1 (Dense)             (None, 232)               68440     
                                                                 
 dense_2 (Dense)             (None, 174)               40542     
                                                                 
 dense_3 (Dense)             (None, 127)               22225     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇▇▇█████████████████████
val_loss,█▅▄▂▂▄▁▂▂▂▃▃▃▂▂▅▄▃▃▃▅▃▄▅▄▃▄▇▇█▅
val_sparse_categorical_accuracy,▁▄▅▆▆▅▇▇▆▇▅▇▆██▆▇▇██▆█▇▇▇███▇▅▇

0,1
GFLOPS,0.00036
best_epoch,6.0
best_val_loss,0.38846
epoch,30.0
loss,0.33708
sparse_categorical_accuracy,0.88767
val_loss,0.54524
val_sparse_categorical_accuracy,0.86133


[34m[1mwandb[0m: Agent Starting Run: 0m08w985 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 347
[34m[1mwandb[0m: 	dense_2: 255
[34m[1mwandb[0m: 	dense_3: 172
[34m[1mwandb[0m: 	dense_4: 55
[34m[1mwandb[0m: 	epochs: 34
[34m[1mwandb[0m: 	learning_rate: 0.017963706117900245
[34m[1mwandb[0m: 	momentum: 0.19405398564031376
[34m[1mwandb[0m: 	rho: 0.9339143423718924


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 347)               272395    
                                                                 
 dense_1 (Dense)             (None, 255)               88740     
                                                                 
 dense_2 (Dense)             (None, 172)               44032     
                                                                 
 dense_3 (Dense)             (None, 55)                9515      
                                                                 
 dense_4 (Dense)             (None, 10)                5

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▃▆▇▇▇▇▇▇▇████████████████████████
val_loss,█▃▂▂▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁
val_sparse_categorical_accuracy,▁▄▆▇▇▇▇▇▇████████████▇███████▇████

0,1
GFLOPS,0.00042
best_epoch,24.0
best_val_loss,0.59796
epoch,33.0
loss,0.6055
sparse_categorical_accuracy,0.77867
val_loss,0.69541
val_sparse_categorical_accuracy,0.75


[34m[1mwandb[0m: Agent Starting Run: bc6vdjcl with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 312
[34m[1mwandb[0m: 	dense_2: 239
[34m[1mwandb[0m: 	dense_3: 128
[34m[1mwandb[0m: 	dense_4: 120
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	learning_rate: 0.009287041056046209
[34m[1mwandb[0m: 	momentum: 0.3256010197868171
[34m[1mwandb[0m: 	rho: 0.9002614572000034


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 312)               244920    
                                                                 
 dense_1 (Dense)             (None, 239)               74807     
                                                                 
 dense_2 (Dense)             (None, 128)               30720     
                                                                 
 dense_3 (Dense)             (None, 120)               15480     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▇▇▇▇▇▇▇███████████████
val_loss,█▃▃▂▅▂▃▁▂▂▄▂▂▁▂▂▂▂▁▂▂▂▃▄▁
val_sparse_categorical_accuracy,▁▅▆▆▅▇▇▇▇▇▆█▇█▇▇█▇█▇███▇█

0,1
GFLOPS,0.00037
best_epoch,13.0
best_val_loss,0.39469
epoch,24.0
loss,0.33493
sparse_categorical_accuracy,0.87856
val_loss,0.42152
val_sparse_categorical_accuracy,0.86


[34m[1mwandb[0m: Agent Starting Run: n708whvv with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 276
[34m[1mwandb[0m: 	dense_2: 243
[34m[1mwandb[0m: 	dense_3: 148
[34m[1mwandb[0m: 	dense_4: 88
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	learning_rate: 0.02524210034094864
[34m[1mwandb[0m: 	momentum: 0.03750609542673932
[34m[1mwandb[0m: 	rho: 0.9160472559443872


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 276)               216660    
                                                                 
 dense_1 (Dense)             (None, 243)               67311     
                                                                 
 dense_2 (Dense)             (None, 148)               36112     
                                                                 
 dense_3 (Dense)             (None, 88)                13112     
                                                                 
 dense_4 (Dense)             (None, 10)                8

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇██████████████
val_loss,▇█▃█▃▃▃▃▆▂▂▃▂▁▃▂▂▂▁▅
val_sparse_categorical_accuracy,▂▃▃▁▃▆▃▄▄▅▇▅▄▇▅▂█▇▇▄

0,1
GFLOPS,0.00033
best_epoch,13.0
best_val_loss,0.58715
epoch,19.0
loss,0.56907
sparse_categorical_accuracy,0.75679
val_loss,0.68616
val_sparse_categorical_accuracy,0.73517


[34m[1mwandb[0m: Agent Starting Run: 0kvpxolp with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 369
[34m[1mwandb[0m: 	dense_2: 256
[34m[1mwandb[0m: 	dense_3: 167
[34m[1mwandb[0m: 	dense_4: 62
[34m[1mwandb[0m: 	epochs: 23
[34m[1mwandb[0m: 	learning_rate: 0.023915749547145974
[34m[1mwandb[0m: 	momentum: 0.118681656704376
[34m[1mwandb[0m: 	rho: 0.8540445807915578


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 369)               289665    
                                                                 
 dense_1 (Dense)             (None, 256)               94720     
                                                                 
 dense_2 (Dense)             (None, 167)               42919     
                                                                 
 dense_3 (Dense)             (None, 62)                10416     
                                                                 
 dense_4 (Dense)             (None, 10)                6

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇██████████████████
val_loss,█▅▂▂▄▄▁▁▂▂▂▁▁▁▂▂▂▂▄▂▄▂▁
val_sparse_categorical_accuracy,▁▃▆▆▄▄█▇█▇▅█▇█▆▆█▅▅▆▆▆▇

0,1
GFLOPS,0.00044
best_epoch,13.0
best_val_loss,0.48511
epoch,22.0
loss,0.46368
sparse_categorical_accuracy,0.83842
val_loss,0.50182
val_sparse_categorical_accuracy,0.8295


[34m[1mwandb[0m: Agent Starting Run: mognoig9 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 406
[34m[1mwandb[0m: 	dense_2: 253
[34m[1mwandb[0m: 	dense_3: 243
[34m[1mwandb[0m: 	dense_4: 127
[34m[1mwandb[0m: 	epochs: 24
[34m[1mwandb[0m: 	learning_rate: 0.005411525192550383
[34m[1mwandb[0m: 	momentum: 0.7493693384674374
[34m[1mwandb[0m: 	rho: 0.9848069560510504


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 406)               318710    
                                                                 
 dense_1 (Dense)             (None, 253)               102971    
                                                                 
 dense_2 (Dense)             (None, 243)               61722     
                                                                 
 dense_3 (Dense)             (None, 127)               30988     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▆▆▇███████████████▇
val_loss,█▅▆▄▅▄▄▂▂▂▁▂▂▁▂▂▁▂▁▁▁▁▁▁
val_sparse_categorical_accuracy,▁▄▄▅▅▆▅▇▇▇█▇▇█▇██▇█████▇

0,1
GFLOPS,0.00052
best_epoch,16.0
best_val_loss,0.70836
epoch,23.0
loss,0.81779
sparse_categorical_accuracy,0.67083
val_loss,0.72726
val_sparse_categorical_accuracy,0.7085


[34m[1mwandb[0m: Agent Starting Run: scl5b0xw with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 336
[34m[1mwandb[0m: 	dense_2: 231
[34m[1mwandb[0m: 	dense_3: 163
[34m[1mwandb[0m: 	dense_4: 128
[34m[1mwandb[0m: 	epochs: 16
[34m[1mwandb[0m: 	learning_rate: 0.004233564533808993
[34m[1mwandb[0m: 	momentum: 0.5409908367867963
[34m[1mwandb[0m: 	rho: 0.9648337699348934


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 336)               263760    
                                                                 
 dense_1 (Dense)             (None, 231)               77847     
                                                                 
 dense_2 (Dense)             (None, 163)               37816     
                                                                 
 dense_3 (Dense)             (None, 128)               20992     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇████████
val_loss,█▄▄▆▃▅▃▄▄▂▅▄▂▂▂▁
val_sparse_categorical_accuracy,▁▅▆▄▆▅▆▆▆▇▆▇▇▇▇█

0,1
GFLOPS,0.0004
best_epoch,15.0
best_val_loss,0.37765
epoch,15.0
loss,0.32944
sparse_categorical_accuracy,0.87935
val_loss,0.37765
val_sparse_categorical_accuracy,0.86942


[34m[1mwandb[0m: Agent Starting Run: xbgmxcrr with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_1: 269
[34m[1mwandb[0m: 	dense_2: 233
[34m[1mwandb[0m: 	dense_3: 220
[34m[1mwandb[0m: 	dense_4: 125
[34m[1mwandb[0m: 	epochs: 34
[34m[1mwandb[0m: 	learning_rate: 0.009083207769728648
[34m[1mwandb[0m: 	momentum: 0.6499345700390465
[34m[1mwandb[0m: 	rho: 0.93269111019955


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 269)               211165    
                                                                 
 dense_1 (Dense)             (None, 233)               62910     
                                                                 
 dense_2 (Dense)             (None, 220)               51480     
                                                                 
 dense_3 (Dense)             (None, 125)               27625     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇█▇█▇███████████
val_loss,█▅▅▄▄▃▄▂▃▃▄▄▅▂▇▇▄▅▁▃▃▃▄▃▂▃▄▆▃▄▃▆▄▆
val_sparse_categorical_accuracy,▁▃▄▄▅▆▅▇▆▆▅▆▄▇▂▁▅▅█▅▇▆▆▇▇▆▇▅▇▆▇▆▇▄

0,1
GFLOPS,0.00035
best_epoch,18.0
best_val_loss,0.45999
epoch,33.0
loss,0.49962
sparse_categorical_accuracy,0.83717
val_loss,0.61286
val_sparse_categorical_accuracy,0.81133


[34m[1mwandb[0m: Agent Starting Run: o0wt2e2v with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 279
[34m[1mwandb[0m: 	dense_2: 250
[34m[1mwandb[0m: 	dense_3: 251
[34m[1mwandb[0m: 	dense_4: 121
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	learning_rate: 0.004580506606897573
[34m[1mwandb[0m: 	momentum: 0.31590714076242876
[34m[1mwandb[0m: 	rho: 0.9546559636341512


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 279)               219015    
                                                                 
 dense_1 (Dense)             (None, 250)               70000     
                                                                 
 dense_2 (Dense)             (None, 251)               63001     
                                                                 
 dense_3 (Dense)             (None, 121)               30492     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▇▇▇▇▇▇▇▇▇█████████████
val_loss,█▆▃▃▃▁▃▂▁▃▁▂▂▂▄▃▁▃▂▂▃▃▂▃▃
val_sparse_categorical_accuracy,▁▃▆▅▆▇▆▇█▆▇▆▇▇▆▆█▇██▇████

0,1
GFLOPS,0.00038
best_epoch,8.0
best_val_loss,0.3591
epoch,24.0
loss,0.26127
sparse_categorical_accuracy,0.90346
val_loss,0.41923
val_sparse_categorical_accuracy,0.87633


[34m[1mwandb[0m: Agent Starting Run: ba6fo59i with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 274
[34m[1mwandb[0m: 	dense_2: 253
[34m[1mwandb[0m: 	dense_3: 161
[34m[1mwandb[0m: 	dense_4: 116
[34m[1mwandb[0m: 	epochs: 37
[34m[1mwandb[0m: 	learning_rate: 0.0034227835563195247
[34m[1mwandb[0m: 	momentum: 0.20955994061458108
[34m[1mwandb[0m: 	rho: 0.933313624629056


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 274)               215090    
                                                                 
 dense_1 (Dense)             (None, 253)               69575     
                                                                 
 dense_2 (Dense)             (None, 161)               40894     
                                                                 
 dense_3 (Dense)             (None, 116)               18792     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████
val_loss,▄▆▄▄▁▃▂▂▁▁▂▂▁▂▁▃▂▂▂▄▅▂▂▄▃▃▃▄▄▄▅▅▄▄▄█▅
val_sparse_categorical_accuracy,▁▁▃▃▆▄▇▆▇▇▇▆▇▇▇▇▇▇█▇▅▇█▇█▇▇▇██▆▆▇██▆█

0,1
GFLOPS,0.00035
best_epoch,9.0
best_val_loss,0.3532
epoch,36.0
loss,0.1893
sparse_categorical_accuracy,0.93198
val_loss,0.54941
val_sparse_categorical_accuracy,0.88292


[34m[1mwandb[0m: Agent Starting Run: zhkgg70f with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_1: 263
[34m[1mwandb[0m: 	dense_2: 253
[34m[1mwandb[0m: 	dense_3: 225
[34m[1mwandb[0m: 	dense_4: 121
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	learning_rate: 0.015445472988889038
[34m[1mwandb[0m: 	momentum: 0.45608448582820127
[34m[1mwandb[0m: 	rho: 0.971323448308948


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 263)               206455    
                                                                 
 dense_1 (Dense)             (None, 253)               66792     
                                                                 
 dense_2 (Dense)             (None, 225)               57150     
                                                                 
 dense_3 (Dense)             (None, 121)               27346     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▃▄▆▇▇▇▇▇▇███████
val_loss,█▄▄▂▃▂▃▁▂▅▂▁▁▃▂▂▂
val_sparse_categorical_accuracy,▁▅▅▆▆▇▇█▆▅▇██▆█▇█

0,1
GFLOPS,0.00036
best_epoch,11.0
best_val_loss,0.74976
epoch,16.0
loss,0.78793
sparse_categorical_accuracy,0.70531
val_loss,0.82648
val_sparse_categorical_accuracy,0.69933


[34m[1mwandb[0m: Agent Starting Run: tcwt0bdm with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 265
[34m[1mwandb[0m: 	dense_2: 236
[34m[1mwandb[0m: 	dense_3: 245
[34m[1mwandb[0m: 	dense_4: 86
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	learning_rate: 0.0005335831240586719
[34m[1mwandb[0m: 	momentum: 0.04561571247682591
[34m[1mwandb[0m: 	rho: 0.99036272753466


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 265)               208025    
                                                                 
 dense_1 (Dense)             (None, 236)               62776     
                                                                 
 dense_2 (Dense)             (None, 245)               58065     
                                                                 
 dense_3 (Dense)             (None, 86)                21156     
                                                                 
 dense_4 (Dense)             (None, 10)                8

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
loss,█▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁
sparse_categorical_accuracy,▁▅▅▆▆▆▇▇▇▇▇▇█████
val_loss,█▄▃▃▅▃▂▂▄▂▁▁▄▃▁▃▂
val_sparse_categorical_accuracy,▁▅▅▆▄▆▇▆▅▇██▅▆█▇█

0,1
GFLOPS,0.00035
best_epoch,11.0
best_val_loss,0.31195
epoch,16.0
loss,0.18958
sparse_categorical_accuracy,0.92754
val_loss,0.33232
val_sparse_categorical_accuracy,0.89275


[34m[1mwandb[0m: Agent Starting Run: k2urw10s with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 420
[34m[1mwandb[0m: 	dense_2: 246
[34m[1mwandb[0m: 	dense_3: 208
[34m[1mwandb[0m: 	dense_4: 127
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	learning_rate: 0.01662480810195338
[34m[1mwandb[0m: 	momentum: 0.13683997179610588
[34m[1mwandb[0m: 	rho: 0.9662665689786591


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 420)               329700    
                                                                 
 dense_1 (Dense)             (None, 246)               103566    
                                                                 
 dense_2 (Dense)             (None, 208)               51376     
                                                                 
 dense_3 (Dense)             (None, 127)               26543     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇███████████
val_loss,▆█▄▅▂▄▄▃▃▄▃▂▂▂▄▆▁
val_sparse_categorical_accuracy,▂▁▄▅▇▅▇▇▆▇▆▇▇█▆▄█

0,1
GFLOPS,0.00051
best_epoch,16.0
best_val_loss,0.54862
epoch,16.0
loss,0.55564
sparse_categorical_accuracy,0.78881
val_loss,0.54862
val_sparse_categorical_accuracy,0.79917


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g906t577 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 307
[34m[1mwandb[0m: 	dense_2: 253
[34m[1mwandb[0m: 	dense_3: 178
[34m[1mwandb[0m: 	dense_4: 63
[34m[1mwandb[0m: 	epochs: 24
[34m[1mwandb[0m: 	learning_rate: 0.010253679661427336
[34m[1mwandb[0m: 	momentum: 0.12881785315156777
[34m[1mwandb[0m: 	rho: 0.8166050302401122


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 307)               240995    
                                                                 
 dense_1 (Dense)             (None, 253)               77924     
                                                                 
 dense_2 (Dense)             (None, 178)               45212     
                                                                 
 dense_3 (Dense)             (None, 63)                11277     
                                                                 
 dense_4 (Dense)             (None, 10)                6

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇██████████████████
val_loss,█▅█▁▂▁▃▂▂▃▃▂▂▂▂▂▂▃▂▅▃▄▅█
val_sparse_categorical_accuracy,▁▅▃▇▇█▄▆▇▅▇█▆█▇▇█▆█▇█▇▅▆

0,1
GFLOPS,0.00038
best_epoch,5.0
best_val_loss,0.44057
epoch,23.0
loss,0.43578
sparse_categorical_accuracy,0.8551
val_loss,0.85479
val_sparse_categorical_accuracy,0.82775


[34m[1mwandb[0m: Agent Starting Run: k120hk5i with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_1: 275
[34m[1mwandb[0m: 	dense_2: 248
[34m[1mwandb[0m: 	dense_3: 247
[34m[1mwandb[0m: 	dense_4: 127
[34m[1mwandb[0m: 	epochs: 34
[34m[1mwandb[0m: 	learning_rate: 0.010523802461401874
[34m[1mwandb[0m: 	momentum: 0.06984948198127626
[34m[1mwandb[0m: 	rho: 0.9620708473723144


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 275)               215875    
                                                                 
 dense_1 (Dense)             (None, 248)               68448     
                                                                 
 dense_2 (Dense)             (None, 247)               61503     
                                                                 
 dense_3 (Dense)             (None, 127)               31496     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
loss,█▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▇▇▇▇▇▇███████████████████████
val_loss,█▄▃▂▆▂▃▁▁▃▂▂▂▂▁▂▂▂▂▁▁▁▂▂▂▂▁▃▁▂▂▂▃▁
val_sparse_categorical_accuracy,▁▄▆▆▄▇▆▇▇▆▇▇▇▇▇▇▇████████▇█▇███▇▇█

0,1
GFLOPS,0.00038
best_epoch,26.0
best_val_loss,0.42009
epoch,33.0
loss,0.35583
sparse_categorical_accuracy,0.87577
val_loss,0.43969
val_sparse_categorical_accuracy,0.859


[34m[1mwandb[0m: Agent Starting Run: uf1d8nat with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 500
[34m[1mwandb[0m: 	dense_2: 234
[34m[1mwandb[0m: 	dense_3: 154
[34m[1mwandb[0m: 	dense_4: 125
[34m[1mwandb[0m: 	epochs: 31
[34m[1mwandb[0m: 	learning_rate: 0.011934969861072698
[34m[1mwandb[0m: 	momentum: 0.10996270466561012
[34m[1mwandb[0m: 	rho: 0.9483973467954644


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 500)               392500    
                                                                 
 dense_1 (Dense)             (None, 234)               117234    
                                                                 
 dense_2 (Dense)             (None, 154)               36190     
                                                                 
 dense_3 (Dense)             (None, 125)               19375     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▇▇▇▇▇▇██████████████████████
val_loss,▆█▄▃▂▂▃▄▁▄▆▃▂▂▁▃▄▂▂▂▂▂▂▂▃▂▂▂▁▁▂
val_sparse_categorical_accuracy,▂▁▄▆▇▆▆▆█▆▆▆▇▇█▇▅▇▆█▆▇▇▇▆▇▇████

0,1
GFLOPS,0.00057
best_epoch,29.0
best_val_loss,0.41492
epoch,30.0
loss,0.34317
sparse_categorical_accuracy,0.87683
val_loss,0.44457
val_sparse_categorical_accuracy,0.85533


[34m[1mwandb[0m: Agent Starting Run: 64yu52yy with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 436
[34m[1mwandb[0m: 	dense_2: 252
[34m[1mwandb[0m: 	dense_3: 245
[34m[1mwandb[0m: 	dense_4: 111
[34m[1mwandb[0m: 	epochs: 39
[34m[1mwandb[0m: 	learning_rate: 0.005947436561788469
[34m[1mwandb[0m: 	momentum: 0.01757582400288955
[34m[1mwandb[0m: 	rho: 0.9497950521877696


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 436)               342260    
                                                                 
 dense_1 (Dense)             (None, 252)               110124    
                                                                 
 dense_2 (Dense)             (None, 245)               61985     
                                                                 
 dense_3 (Dense)             (None, 111)               27306     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,█▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████████
val_loss,█▄▂▄▂▂▂▂▅▅▂▁▂▁▃▁▄▂▂▂▂▃▂▂▂▃▂▃▃▃▂▂▄▄▆▃▅▆▆
val_sparse_categorical_accuracy,▁▅▆▄▆▆▆▆▄▄▆▇▇▇▅█▆▇███▇█▇█▇▇▇▇███▇▇▇█▇▇▇

0,1
GFLOPS,0.00054
best_epoch,15.0
best_val_loss,0.37854
epoch,38.0
loss,0.23005
sparse_categorical_accuracy,0.91579
val_loss,0.56989
val_sparse_categorical_accuracy,0.86833


[34m[1mwandb[0m: Agent Starting Run: yghzx9su with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 363
[34m[1mwandb[0m: 	dense_2: 246
[34m[1mwandb[0m: 	dense_3: 147
[34m[1mwandb[0m: 	dense_4: 99
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	learning_rate: 0.001266287404751428
[34m[1mwandb[0m: 	momentum: 0.27566146279392545
[34m[1mwandb[0m: 	rho: 0.9841212943927308


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 363)               284955    
                                                                 
 dense_1 (Dense)             (None, 246)               89544     
                                                                 
 dense_2 (Dense)             (None, 147)               36309     
                                                                 
 dense_3 (Dense)             (None, 99)                14652     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▇▇▇▇▇▇█████
val_loss,█▆▄▂▄▃▃▁▂▁▂▁▄▃▁
val_sparse_categorical_accuracy,▁▂▄▆▅▅▅▇▇▇▇█▆▇█

0,1
GFLOPS,0.00043
best_epoch,14.0
best_val_loss,0.33053
epoch,14.0
loss,0.2101
sparse_categorical_accuracy,0.91992
val_loss,0.33053
val_sparse_categorical_accuracy,0.89117


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ck6w3099 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 351
[34m[1mwandb[0m: 	dense_2: 244
[34m[1mwandb[0m: 	dense_3: 195
[34m[1mwandb[0m: 	dense_4: 117
[34m[1mwandb[0m: 	epochs: 47
[34m[1mwandb[0m: 	learning_rate: 0.027321995468362396
[34m[1mwandb[0m: 	momentum: 0.36984673688760694
[34m[1mwandb[0m: 	rho: 0.9500292244367574


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 351)               275535    
                                                                 
 dense_1 (Dense)             (None, 244)               85888     
                                                                 
 dense_2 (Dense)             (None, 195)               47775     
                                                                 
 dense_3 (Dense)             (None, 117)               22932     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▂▁▁▁▁▃▅▆▅▆▇▇▇▇▇▇▇▇▇█▇▇▇▇▇████▇▅▇████████
val_loss,▆█▇█▆▅▂▂▄▂▂▁▁▁▂▃▄▂▄▂▁▁▂▂▁▁▁▂▂▁▃▂▁▁▂▁▂▁▃▁
val_sparse_categorical_accuracy,▂▁▂▁▂▄▆▆▅▆▇█▇▇▆▅▅▇▅▇██▇▇▇█▇▇▇█▆▆█▇▇▇▇▇▇█

0,1
GFLOPS,0.00043
best_epoch,26.0
best_val_loss,0.88979
epoch,46.0
loss,0.91513
sparse_categorical_accuracy,0.64029
val_loss,0.92002
val_sparse_categorical_accuracy,0.64942


[34m[1mwandb[0m: Agent Starting Run: g63kl888 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 273
[34m[1mwandb[0m: 	dense_2: 232
[34m[1mwandb[0m: 	dense_3: 253
[34m[1mwandb[0m: 	dense_4: 46
[34m[1mwandb[0m: 	epochs: 19
[34m[1mwandb[0m: 	learning_rate: 0.00035487467010735375
[34m[1mwandb[0m: 	momentum: 0.3993479570103134
[34m[1mwandb[0m: 	rho: 0.9969358773845084


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 273)               214305    
                                                                 
 dense_1 (Dense)             (None, 232)               63568     
                                                                 
 dense_2 (Dense)             (None, 253)               58949     
                                                                 
 dense_3 (Dense)             (None, 46)                11684     
                                                                 
 dense_4 (Dense)             (None, 10)                4

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██
loss,█▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
sparse_categorical_accuracy,▁▅▅▆▆▆▆▇▇▇▇▇▇▇█████
val_loss,█▆▅▃▃▃▄▂▂▄▁▄▆▁▄▂▂▃▃
val_sparse_categorical_accuracy,▁▃▃▅▅▆▅▆▆▅█▇▄█▅██▇█

0,1
GFLOPS,0.00035
best_epoch,10.0
best_val_loss,0.3028
epoch,18.0
loss,0.1673
sparse_categorical_accuracy,0.93692
val_loss,0.34498
val_sparse_categorical_accuracy,0.89217


[34m[1mwandb[0m: Agent Starting Run: otqy0e2x with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 269
[34m[1mwandb[0m: 	dense_2: 247
[34m[1mwandb[0m: 	dense_3: 163
[34m[1mwandb[0m: 	dense_4: 49
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	learning_rate: 0.017510161745455215
[34m[1mwandb[0m: 	momentum: 0.6393600898792571
[34m[1mwandb[0m: 	rho: 0.948845006416866


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 269)               211165    
                                                                 
 dense_1 (Dense)             (None, 247)               66690     
                                                                 
 dense_2 (Dense)             (None, 163)               40424     
                                                                 
 dense_3 (Dense)             (None, 49)                8036      
                                                                 
 dense_4 (Dense)             (None, 10)                5

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▄▂▇▄▅▅█▅▄▁▃▅▄▄▆▄▄
val_loss,▃▅█▂█▃▅▁▄▃▄▁▁▆▂▄▂
val_sparse_categorical_accuracy,▅█▁▅▅█▃▅▅█▁█▆▅▆▃▆

0,1
GFLOPS,0.00033
best_epoch,11.0
best_val_loss,2.30387
epoch,16.0
loss,2.30484
sparse_categorical_accuracy,0.09956
val_loss,2.30426
val_sparse_categorical_accuracy,0.10125


[34m[1mwandb[0m: Agent Starting Run: ofhrk7ys with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 333
[34m[1mwandb[0m: 	dense_2: 240
[34m[1mwandb[0m: 	dense_3: 250
[34m[1mwandb[0m: 	dense_4: 117
[34m[1mwandb[0m: 	epochs: 29
[34m[1mwandb[0m: 	learning_rate: 0.014489539897506606
[34m[1mwandb[0m: 	momentum: 0.3884216662682779
[34m[1mwandb[0m: 	rho: 0.8935781308980929


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 333)               261405    
                                                                 
 dense_1 (Dense)             (None, 240)               80160     
                                                                 
 dense_2 (Dense)             (None, 250)               60250     
                                                                 
 dense_3 (Dense)             (None, 117)               29367     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇█████████████████████
val_loss,█▃▄▄▃▃▂▂▁▂▂▁▂▄▄▂▁▁▄▂▁▂▁▁▂▁▂▂▁
val_sparse_categorical_accuracy,▁▅▄▅▅▆▆▆█▇▆█▇▅▇▆██▆▇█▇██▇████

0,1
GFLOPS,0.00043
best_epoch,11.0
best_val_loss,0.46138
epoch,28.0
loss,0.43188
sparse_categorical_accuracy,0.85771
val_loss,0.47588
val_sparse_categorical_accuracy,0.84683


[34m[1mwandb[0m: Agent Starting Run: enzsu86a with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_1: 503
[34m[1mwandb[0m: 	dense_2: 245
[34m[1mwandb[0m: 	dense_3: 173
[34m[1mwandb[0m: 	dense_4: 124
[34m[1mwandb[0m: 	epochs: 26
[34m[1mwandb[0m: 	learning_rate: 0.002608873034997157
[34m[1mwandb[0m: 	momentum: 0.3686995150933007
[34m[1mwandb[0m: 	rho: 0.9939310462164344


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 503)               394855    
                                                                 
 dense_1 (Dense)             (None, 245)               123480    
                                                                 
 dense_2 (Dense)             (None, 173)               42558     
                                                                 
 dense_3 (Dense)             (None, 124)               21576     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▇▇▇▇▇▇▇▇▇█▇██████████
val_loss,██▃▆▃▁▃▂▂▄▁▂▁▃▂▃▃▃▂▂▄▃▂▁▃▂
val_sparse_categorical_accuracy,▁▁▆▃▆▇▅▆▆▅▇▇▇▇▇▆▇▇▇█▆▇▇█▇█

0,1
GFLOPS,0.00058
best_epoch,12.0
best_val_loss,0.36084
epoch,25.0
loss,0.27955
sparse_categorical_accuracy,0.89727
val_loss,0.37714
val_sparse_categorical_accuracy,0.87733


[34m[1mwandb[0m: Agent Starting Run: 12wyczm4 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 414
[34m[1mwandb[0m: 	dense_2: 247
[34m[1mwandb[0m: 	dense_3: 136
[34m[1mwandb[0m: 	dense_4: 107
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	learning_rate: 0.003778315122321275
[34m[1mwandb[0m: 	momentum: 0.060224215283510275
[34m[1mwandb[0m: 	rho: 0.960006782839936


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 414)               324990    
                                                                 
 dense_1 (Dense)             (None, 247)               102505    
                                                                 
 dense_2 (Dense)             (None, 136)               33728     
                                                                 
 dense_3 (Dense)             (None, 107)               14659     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▇▇▇▇▇▇▇█████████
val_loss,▆▄█▂▃▅▁▂▂▂▄▁▂▂▁▃▂▂▂▃
val_sparse_categorical_accuracy,▃▅▁▇▆▆▇▆▇▇▆█▇██▇▇▇█▇

0,1
GFLOPS,0.00048
best_epoch,14.0
best_val_loss,0.34558
epoch,19.0
loss,0.23478
sparse_categorical_accuracy,0.91177
val_loss,0.40607
val_sparse_categorical_accuracy,0.8745


[34m[1mwandb[0m: Agent Starting Run: ehazmneb with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 258
[34m[1mwandb[0m: 	dense_2: 169
[34m[1mwandb[0m: 	dense_3: 135
[34m[1mwandb[0m: 	dense_4: 122
[34m[1mwandb[0m: 	epochs: 32
[34m[1mwandb[0m: 	learning_rate: 0.0010263319224626468
[34m[1mwandb[0m: 	momentum: 0.011490379930376394
[34m[1mwandb[0m: 	rho: 0.969398198824563


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 258)               202530    
                                                                 
 dense_1 (Dense)             (None, 169)               43771     
                                                                 
 dense_2 (Dense)             (None, 135)               22950     
                                                                 
 dense_3 (Dense)             (None, 122)               16592     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇███
loss,█▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██████████
val_loss,▇▅▃▃▂▂▃▃▂▃▂▂▂▂▂▂▁▂▃▂▂▂▃█▃▃▃▆▅▃▃▅
val_sparse_categorical_accuracy,▁▁▄▅▆▆▅▅▇▅▇▇▇▇▇▇█▆▆███▇▄██▇▆▅▇▇█

0,1
GFLOPS,0.00029
best_epoch,16.0
best_val_loss,0.32195
epoch,31.0
loss,0.12384
sparse_categorical_accuracy,0.95125
val_loss,0.46896
val_sparse_categorical_accuracy,0.89108


[34m[1mwandb[0m: Agent Starting Run: 6zxxjczl with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dense_1: 373
[34m[1mwandb[0m: 	dense_2: 230
[34m[1mwandb[0m: 	dense_3: 168
[34m[1mwandb[0m: 	dense_4: 120
[34m[1mwandb[0m: 	epochs: 34
[34m[1mwandb[0m: 	learning_rate: 0.0008461161112629131
[34m[1mwandb[0m: 	momentum: 0.1085628386749145
[34m[1mwandb[0m: 	rho: 0.9337232133640996


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 373)               292805    
                                                                 
 dense_1 (Dense)             (None, 230)               86020     
                                                                 
 dense_2 (Dense)             (None, 168)               38808     
                                                                 
 dense_3 (Dense)             (None, 120)               20280     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
loss,█▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████
val_loss,█▃▅▄▂▄▁▂▂▂▁▁▁▁▂▁▂▂▃▃▅▅▂▂▂▂▂▃▃▅▄▃▅▄
val_sparse_categorical_accuracy,▁▆▅▅▆▅▇▆▇▆▇▇▇▇▇█▇▇▇▇▆▇▇██████▇████

0,1
GFLOPS,0.00044
best_epoch,10.0
best_val_loss,0.33452
epoch,33.0
loss,0.11037
sparse_categorical_accuracy,0.95823
val_loss,0.45669
val_sparse_categorical_accuracy,0.89233


[34m[1mwandb[0m: Agent Starting Run: nyg4r5il with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_1: 474
[34m[1mwandb[0m: 	dense_2: 243
[34m[1mwandb[0m: 	dense_3: 188
[34m[1mwandb[0m: 	dense_4: 123
[34m[1mwandb[0m: 	epochs: 35
[34m[1mwandb[0m: 	learning_rate: 0.002561022651486402
[34m[1mwandb[0m: 	momentum: 0.10579060790345927
[34m[1mwandb[0m: 	rho: 0.9634445046386808


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 474)               372090    
                                                                 
 dense_1 (Dense)             (None, 243)               115425    
                                                                 
 dense_2 (Dense)             (None, 188)               45872     
                                                                 
 dense_3 (Dense)             (None, 123)               23247     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
loss,█▄▃▂▂▂▂▂▂▂▁▂▂▂▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▂▃▃▃▃
sparse_categorical_accuracy,▁▅▆▇▇▇▇▇▇▇████████████████▇████▇▇▇▇
val_loss,▃▂▁▂▂▁▂▂▂▂▂▂▃▃▃▃▃▃▃▅▄▄▄▇▄▄▄▆▅▅▅▅▅█▅
val_sparse_categorical_accuracy,▁▁▆▆▇▇█▆▆▆▅█▄▇▆▆▇▆▇▆▅▇█▆▇█▇▆▄▇█▆▅▁▄

0,1
GFLOPS,0.00056
best_epoch,2.0
best_val_loss,0.38425
epoch,34.0
loss,0.40281
sparse_categorical_accuracy,0.87875
val_loss,0.6491
val_sparse_categorical_accuracy,0.8465


## Show best hyperparameters

In [8]:
api = wandb.Api()
sweep = api.sweep("nsiete23/tf_mlp/sweeps/" + sweep_id)

best_run = sweep.best_run()
print(best_run.id)
pprint(best_run.config)

[34m[1mwandb[0m: Sorting runs by +summary_metrics.val_loss


yghzx9su
{'batch_size': 256,
 'dense_1': 363,
 'dense_2': 246,
 'dense_3': 147,
 'dense_4': 99,
 'epochs': 15,
 'learning_rate': 0.001266287404751428,
 'momentum': 0.27566146279392545,
 'rho': 0.9841212943927308}


## Finish Wandb run

In [9]:
wandb.finish()