# MLP with Adam Using Tensorflow - Tuning

In [1]:
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
import wandb
from pprint import pprint

from tf_utils import create_model, train_model

## Login to Wandb

In [2]:
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

## Load Fashion-MNIST

In [3]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


## Define sweep agent function

In [4]:
def sweep_train():
    default_config = {
        'dense_1': 512,
        'dense_2': 256,
        'dense_3': 256,
        'dense_4': 64,
        'learning_rate': 0.001,
        'beta_1': 0.9,
        'beta_2': 0.999,
        'epochs': 50,
        'batch_size': 128,
    }

    wandb.init(config=default_config)

    config = wandb.config

    mlp = create_model(config)
    mlp.summary()
    mlp.compile(
        optimizer=Adam(learning_rate=config.learning_rate, beta_1=config.beta_1, beta_2=config.beta_2),
        loss=SparseCategoricalCrossentropy(),
        metrics=[SparseCategoricalAccuracy()]
    )
    _ = train_model(mlp, config, x_train, y_train, verbose=0)

## Configure Wandb Sweep for hyperparameter tuning

In [5]:
sweep_config = {
    'name': 'tf-adam-sweep',
    'method': 'bayes',
    'metric': {
        'name': 'val_loss',
        'goal': 'minimize'
    },
    'early_terminate':{
        'type': 'hyperband',
        'min_iter': 2
    },
    'parameters': {
        'batch_size': {
            'values': [16, 32, 64, 128, 256]
        },
        'epochs': {
            'min': 15,
            'max': 50
        },
        'learning_rate': {
            'min': 0.0001,
            'max': 0.1
        },
        'beta_1': {
            'min': 0.9,
            'max': 0.999
        },
        'beta_2': {
            'min': 0.99,
            'max': 0.9999
        },
        'dense_1': {
            'min': 256,
            'max': 512
        },
        'dense_2': {
            'min': 128,
            'max': 256
        },
        'dense_3': {
            'min': 128,
            'max': 256
        },
        'dense_4': {
            'min': 32,
            'max': 128
        }
    }
}

pprint(sweep_config)

sweep_id = wandb.sweep(sweep_config, project='tf_mlp')

{'early_terminate': {'min_iter': 2, 'type': 'hyperband'},
 'method': 'bayes',
 'metric': {'goal': 'minimize', 'name': 'val_loss'},
 'name': 'tf-adam-sweep',
 'parameters': {'batch_size': {'values': [16, 32, 64, 128, 256]},
                'beta_1': {'max': 0.999, 'min': 0.9},
                'beta_2': {'max': 0.9999, 'min': 0.99},
                'dense_1': {'max': 512, 'min': 256},
                'dense_2': {'max': 256, 'min': 128},
                'dense_3': {'max': 256, 'min': 128},
                'dense_4': {'max': 128, 'min': 32},
                'epochs': {'max': 50, 'min': 15},
                'learning_rate': {'max': 0.1, 'min': 0.0001}}}
Create sweep with ID: 12a25071
Sweep URL: https://wandb.ai/nsiete23/tf_mlp/sweeps/12a25071


## Run sweep

In [6]:
wandb.agent(sweep_id, function=sweep_train, count=50)

[34m[1mwandb[0m: Agent Starting Run: qdcc9m2a with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta_1: 0.9055461392449808
[34m[1mwandb[0m: 	beta_2: 0.9955683606550336
[34m[1mwandb[0m: 	dense_1: 502
[34m[1mwandb[0m: 	dense_2: 150
[34m[1mwandb[0m: 	dense_3: 227
[34m[1mwandb[0m: 	dense_4: 123
[34m[1mwandb[0m: 	epochs: 48
[34m[1mwandb[0m: 	learning_rate: 0.09416619525165897
[34m[1mwandb[0m: Currently logged in as: [33mv-modroczky[0m ([33mnsiete23[0m). Use [1m`wandb login --relogin`[0m to force relogin


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 502)               394070    
                                                                 
 dense_1 (Dense)             (None, 150)               75450     
                                                                 
 dense_2 (Dense)             (None, 227)               34277     
                                                                 
 dense_3 (Dense)             (None, 123)               28044     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▄▃▃▂▅▃▅▅▃█▄▃▄▄▂▃▄▅▁▂▂▃▆▁▃▂▇▃▅▇▃▄▄▅▆▅▃▄▃▄
val_loss,▂▄█▃▇▆▆▁▂▂▄▃▂▁▄▃▁▄▃▁▃▄▃▃▃▃▂▆▂▂▃▃▃▃▂▂▃▃▅▃
val_sparse_categorical_accuracy,▅▃▁███▅▃▆█▅▅▅▅▃█▅█▅█▃▃▅▃▃▃▁▅█▃▅█▆▆██▃▅█▅

0,1
GFLOPS,0.00053
best_epoch,35.0
best_val_loss,2.30686
epoch,47.0
loss,2.31525
sparse_categorical_accuracy,0.10027
val_loss,2.31676
val_sparse_categorical_accuracy,0.10033


[34m[1mwandb[0m: Agent Starting Run: nnzsrrxb with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.941964814245118
[34m[1mwandb[0m: 	beta_2: 0.994023947453032
[34m[1mwandb[0m: 	dense_1: 305
[34m[1mwandb[0m: 	dense_2: 183
[34m[1mwandb[0m: 	dense_3: 206
[34m[1mwandb[0m: 	dense_4: 58
[34m[1mwandb[0m: 	epochs: 32
[34m[1mwandb[0m: 	learning_rate: 0.03703548823282531


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 305)               239425    
                                                                 
 dense_1 (Dense)             (None, 183)               55998     
                                                                 
 dense_2 (Dense)             (None, 206)               37904     
                                                                 
 dense_3 (Dense)             (None, 58)                12006     
                                                                 
 dense_4 (Dense)             (None, 10)                5

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇███
loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▇▇██████████████████▇████████
val_loss,█▄▄▂▃▂▁▂▁▁▁▃▁▁▂▂▁▂▂▁▂▂▂▃▃▁▂▁▂▂▂▁
val_sparse_categorical_accuracy,▁▅▆▇▇██▇▇██▇█▇███▇███▇█▆▇█▇█▇▇██

0,1
GFLOPS,0.00035
best_epoch,19.0
best_val_loss,0.57257
epoch,31.0
loss,0.56525
sparse_categorical_accuracy,0.80292
val_loss,0.59072
val_sparse_categorical_accuracy,0.80033


[34m[1mwandb[0m: Agent Starting Run: 4xtrgop1 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta_1: 0.9457768261413304
[34m[1mwandb[0m: 	beta_2: 0.9929244503361966
[34m[1mwandb[0m: 	dense_1: 499
[34m[1mwandb[0m: 	dense_2: 131
[34m[1mwandb[0m: 	dense_3: 211
[34m[1mwandb[0m: 	dense_4: 100
[34m[1mwandb[0m: 	epochs: 24
[34m[1mwandb[0m: 	learning_rate: 0.010695832123544496


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 499)               391715    
                                                                 
 dense_1 (Dense)             (None, 131)               65500     
                                                                 
 dense_2 (Dense)             (None, 211)               27852     
                                                                 
 dense_3 (Dense)             (None, 100)               21200     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
loss,█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▂▁▃▃▂▂▂▂▃
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇▇▇▇████▇█▇▇███▇▇
val_loss,▂▂▂▂▃▂▂▅▂▆▂▃▁▂▃▃▃▄█▃▅█▅█
val_sparse_categorical_accuracy,▂▃▃▅▄▂▂▅▆▃▇▇█▆▅▄▃▄▄█▇▁▂▄

0,1
GFLOPS,0.00051
best_epoch,12.0
best_val_loss,0.47334
epoch,23.0
loss,0.49793
sparse_categorical_accuracy,0.84217
val_loss,0.77344
val_sparse_categorical_accuracy,0.83075


[34m[1mwandb[0m: Agent Starting Run: 20q1fnt0 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta_1: 0.9247684002500548
[34m[1mwandb[0m: 	beta_2: 0.999823216633493
[34m[1mwandb[0m: 	dense_1: 443
[34m[1mwandb[0m: 	dense_2: 165
[34m[1mwandb[0m: 	dense_3: 191
[34m[1mwandb[0m: 	dense_4: 70
[34m[1mwandb[0m: 	epochs: 38
[34m[1mwandb[0m: 	learning_rate: 0.09926648137767496


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 443)               347755    
                                                                 
 dense_1 (Dense)             (None, 165)               73260     
                                                                 
 dense_2 (Dense)             (None, 191)               31706     
                                                                 
 dense_3 (Dense)             (None, 70)                13440     
                                                                 
 dense_4 (Dense)             (None, 10)                7

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▇▂▄▅▃▇▆▅▄▆▄▁▅▇█▃▁▅▆▄
val_loss,▁▂▃▅▃▅▃▂▅▆▂▆▄▃▃█▁▃▃▅
val_sparse_categorical_accuracy,█▅▃▆█▆█▆▅▅▅▅▃▃▅▁█▆█▃

0,1
GFLOPS,0.00047
best_epoch,0.0
best_val_loss,2.30361
epoch,19.0
loss,2.31157
sparse_categorical_accuracy,0.09898
val_loss,2.31793
val_sparse_categorical_accuracy,0.09833


[34m[1mwandb[0m: Agent Starting Run: hpcrs0ab with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9591544234365096
[34m[1mwandb[0m: 	beta_2: 0.9903891493448778
[34m[1mwandb[0m: 	dense_1: 324
[34m[1mwandb[0m: 	dense_2: 164
[34m[1mwandb[0m: 	dense_3: 198
[34m[1mwandb[0m: 	dense_4: 72
[34m[1mwandb[0m: 	epochs: 29
[34m[1mwandb[0m: 	learning_rate: 0.01008544856907484


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 324)               254340    
                                                                 
 dense_1 (Dense)             (None, 164)               53300     
                                                                 
 dense_2 (Dense)             (None, 198)               32670     
                                                                 
 dense_3 (Dense)             (None, 72)                14328     
                                                                 
 dense_4 (Dense)             (None, 10)                7

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
loss,█▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▇▇▇▇▇▇▇██████████████████
val_loss,█▅▄▃▂▂▃▂▃▃▂▂▁▁▂▂▃▂▃▄▂▂▃▃▃▄▃▅▄
val_sparse_categorical_accuracy,▁▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▆▇█▇▇▆▇▇▇█

0,1
GFLOPS,0.00036
best_epoch,12.0
best_val_loss,0.35522
epoch,28.0
loss,0.25196
sparse_categorical_accuracy,0.90894
val_loss,0.39617
val_sparse_categorical_accuracy,0.88542


[34m[1mwandb[0m: Agent Starting Run: qqb5dtku with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9250510279922884
[34m[1mwandb[0m: 	beta_2: 0.9905052005915664
[34m[1mwandb[0m: 	dense_1: 288
[34m[1mwandb[0m: 	dense_2: 242
[34m[1mwandb[0m: 	dense_3: 232
[34m[1mwandb[0m: 	dense_4: 80
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	learning_rate: 0.0011498246866222658


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 288)               226080    
                                                                 
 dense_1 (Dense)             (None, 242)               69938     
                                                                 
 dense_2 (Dense)             (None, 232)               56376     
                                                                 
 dense_3 (Dense)             (None, 80)                18640     
                                                                 
 dense_4 (Dense)             (None, 10)                8

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
loss,█▅▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁
sparse_categorical_accuracy,▁▄▅▆▆▆▆▇▇▇▇▇▇████
val_loss,█▅▃▄▂▃▂▁▁▂▁▂▁▁▁▂▂
val_sparse_categorical_accuracy,▁▃▅▄▅▆▆▇▇▆█▇███▇▇

0,1
GFLOPS,0.00037
best_epoch,12.0
best_val_loss,0.31143
epoch,16.0
loss,0.17596
sparse_categorical_accuracy,0.93246
val_loss,0.32723
val_sparse_categorical_accuracy,0.8905


[34m[1mwandb[0m: Agent Starting Run: fml5k21d with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta_1: 0.9780679993488128
[34m[1mwandb[0m: 	beta_2: 0.9912594361660652
[34m[1mwandb[0m: 	dense_1: 329
[34m[1mwandb[0m: 	dense_2: 217
[34m[1mwandb[0m: 	dense_3: 252
[34m[1mwandb[0m: 	dense_4: 54
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	learning_rate: 0.0037183337326787617


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 329)               258265    
                                                                 
 dense_1 (Dense)             (None, 217)               71610     
                                                                 
 dense_2 (Dense)             (None, 252)               54936     
                                                                 
 dense_3 (Dense)             (None, 54)                13662     
                                                                 
 dense_4 (Dense)             (None, 10)                5

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▅▆▆▇▇▇▇▇▇▇████████
val_loss,▆▃▃▁▄▅▄▁▂▃▄▁▂▃▃▁█▇█▇
val_sparse_categorical_accuracy,▁▄▃▆▅▄▆▇▅▆▄▇█▆▆█▇█▆▆

0,1
GFLOPS,0.0004
best_epoch,3.0
best_val_loss,0.38234
epoch,19.0
loss,0.3023
sparse_categorical_accuracy,0.89358
val_loss,0.46091
val_sparse_categorical_accuracy,0.86467


[34m[1mwandb[0m: Agent Starting Run: vw7ynyxf with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.974542160927393
[34m[1mwandb[0m: 	beta_2: 0.9901804451496397
[34m[1mwandb[0m: 	dense_1: 380
[34m[1mwandb[0m: 	dense_2: 204
[34m[1mwandb[0m: 	dense_3: 249
[34m[1mwandb[0m: 	dense_4: 32
[34m[1mwandb[0m: 	epochs: 22
[34m[1mwandb[0m: 	learning_rate: 0.02627930081124824


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 380)               298300    
                                                                 
 dense_1 (Dense)             (None, 204)               77724     
                                                                 
 dense_2 (Dense)             (None, 249)               51045     
                                                                 
 dense_3 (Dense)             (None, 32)                8000      
                                                                 
 dense_4 (Dense)             (None, 10)                3

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▇███████████████████
val_loss,█▇▂▂▂▂▂▁▁▁▁▁▁▁▁▃▁▁▁▁▁▁
val_sparse_categorical_accuracy,▁▂▇▇█▇▇███████████▇███

0,1
GFLOPS,0.00044
best_epoch,16.0
best_val_loss,1.20058
epoch,21.0
loss,1.17828
sparse_categorical_accuracy,0.478
val_loss,1.21167
val_sparse_categorical_accuracy,0.47217


[34m[1mwandb[0m: Agent Starting Run: fgnkpjr1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta_1: 0.9429665148103002
[34m[1mwandb[0m: 	beta_2: 0.9925600684575656
[34m[1mwandb[0m: 	dense_1: 307
[34m[1mwandb[0m: 	dense_2: 194
[34m[1mwandb[0m: 	dense_3: 198
[34m[1mwandb[0m: 	dense_4: 102
[34m[1mwandb[0m: 	epochs: 23
[34m[1mwandb[0m: 	learning_rate: 0.0030259295998909543


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 307)               240995    
                                                                 
 dense_1 (Dense)             (None, 194)               59752     
                                                                 
 dense_2 (Dense)             (None, 198)               38610     
                                                                 
 dense_3 (Dense)             (None, 102)               20298     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇██
loss,█▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇███████
val_loss,▇▆▅▃▅▃▂▃▂▁▃▁▃█▄▃▄▄▆▅██▇
val_sparse_categorical_accuracy,▁▃▄▆▆▆▇▆▇▇▇█▇▆█▇█▇▇▇▇█▇

0,1
GFLOPS,0.00036
best_epoch,9.0
best_val_loss,0.34002
epoch,22.0
loss,0.23002
sparse_categorical_accuracy,0.91621
val_loss,0.43507
val_sparse_categorical_accuracy,0.88258


[34m[1mwandb[0m: Agent Starting Run: nlmeu15j with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.9950173271388416
[34m[1mwandb[0m: 	beta_2: 0.9913666077907416
[34m[1mwandb[0m: 	dense_1: 261
[34m[1mwandb[0m: 	dense_2: 141
[34m[1mwandb[0m: 	dense_3: 194
[34m[1mwandb[0m: 	dense_4: 107
[34m[1mwandb[0m: 	epochs: 18
[34m[1mwandb[0m: 	learning_rate: 0.000791861964955855


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 261)               204885    
                                                                 
 dense_1 (Dense)             (None, 141)               36942     
                                                                 
 dense_2 (Dense)             (None, 194)               27548     
                                                                 
 dense_3 (Dense)             (None, 107)               20865     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
loss,█▅▄▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁
sparse_categorical_accuracy,▁▄▅▅▆▆▇▇▇▇▇▇▇▇████
val_loss,█▆▅▃▂▂▂▁▂▂▂▂▂▂▂▂▂▂
val_sparse_categorical_accuracy,▁▃▄▆▆▇▇▇▇▇▇█▇▇▇███

0,1
GFLOPS,0.00029
best_epoch,7.0
best_val_loss,0.30599
epoch,17.0
loss,0.18411
sparse_categorical_accuracy,0.92935
val_loss,0.32697
val_sparse_categorical_accuracy,0.895


[34m[1mwandb[0m: Agent Starting Run: s8n23te1 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.9484545584631338
[34m[1mwandb[0m: 	beta_2: 0.992782895927994
[34m[1mwandb[0m: 	dense_1: 272
[34m[1mwandb[0m: 	dense_2: 134
[34m[1mwandb[0m: 	dense_3: 232
[34m[1mwandb[0m: 	dense_4: 74
[34m[1mwandb[0m: 	epochs: 23
[34m[1mwandb[0m: 	learning_rate: 0.004176293709502716


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 272)               213520    
                                                                 
 dense_1 (Dense)             (None, 134)               36582     
                                                                 
 dense_2 (Dense)             (None, 232)               31320     
                                                                 
 dense_3 (Dense)             (None, 74)                17242     
                                                                 
 dense_4 (Dense)             (None, 10)                7

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇██
loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▆▆▆▆▇▇▇▇▇▇▇▇▇███████
val_loss,█▆▃▄▃▃▁▄▂▂▄▂▄▂▃▄▄▅█▅▅▅▇
val_sparse_categorical_accuracy,▁▂▅▅▅▆▇▃▇█▆█▇██▆█▇▇████

0,1
GFLOPS,0.0003
best_epoch,6.0
best_val_loss,0.32572
epoch,22.0
loss,0.22812
sparse_categorical_accuracy,0.91881
val_loss,0.41833
val_sparse_categorical_accuracy,0.88383


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lgocg9xl with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9536129572235824
[34m[1mwandb[0m: 	beta_2: 0.9924920860176142
[34m[1mwandb[0m: 	dense_1: 258
[34m[1mwandb[0m: 	dense_2: 164
[34m[1mwandb[0m: 	dense_3: 155
[34m[1mwandb[0m: 	dense_4: 66
[34m[1mwandb[0m: 	epochs: 22
[34m[1mwandb[0m: 	learning_rate: 0.005573776299458383


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 258)               202530    
                                                                 
 dense_1 (Dense)             (None, 164)               42476     
                                                                 
 dense_2 (Dense)             (None, 155)               25575     
                                                                 
 dense_3 (Dense)             (None, 66)                10296     
                                                                 
 dense_4 (Dense)             (None, 10)                6

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▇▇▇▇▇▇▇▇▇████████
val_loss,█▅▆▄▃▄▁▂▂▁▂▂▃▁▂▁▅▁▅▂▄▅
val_sparse_categorical_accuracy,▁▃▃▄▆▅▇▆▇▇▆▇▆▇▇▇▇█▇█▇▇

0,1
GFLOPS,0.00028
best_epoch,9.0
best_val_loss,0.33546
epoch,21.0
loss,0.22671
sparse_categorical_accuracy,0.91581
val_loss,0.39776
val_sparse_categorical_accuracy,0.88233


[34m[1mwandb[0m: Agent Starting Run: 7hv5kjki with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.9960225753316504
[34m[1mwandb[0m: 	beta_2: 0.990382721725471
[34m[1mwandb[0m: 	dense_1: 266
[34m[1mwandb[0m: 	dense_2: 201
[34m[1mwandb[0m: 	dense_3: 218
[34m[1mwandb[0m: 	dense_4: 120
[34m[1mwandb[0m: 	epochs: 43
[34m[1mwandb[0m: 	learning_rate: 0.00918036895366317


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 266)               208810    
                                                                 
 dense_1 (Dense)             (None, 201)               53667     
                                                                 
 dense_2 (Dense)             (None, 218)               44036     
                                                                 
 dense_3 (Dense)             (None, 120)               26280     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇██
loss,█▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▂▁▁▁▁▁▁▂▄▂▂▂▃▂▂▂▂▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇▇▇▇▇▇███████▇█████▇▇▇▇▇▇▇▇▇▇████
val_loss,█▇▄▃▃▄▃▁▁▃█▄▅▃▁▃▁▂▂▄▂▂▃▁▂▃▅▃▂▆▃▄█▅▆▅▄▄▆▁
val_sparse_categorical_accuracy,▁▃▅▆▆▄▆▇▆▅▆▅▅▆▇▆█▆▇▅▇▇██▇▇▆▆▆▇▆▆▆▆▄▇▇▇▇▇

0,1
GFLOPS,0.00033
best_epoch,15.0
best_val_loss,0.40776
epoch,42.0
loss,0.35243
sparse_categorical_accuracy,0.87877
val_loss,0.41557
val_sparse_categorical_accuracy,0.86375


[34m[1mwandb[0m: Agent Starting Run: 9kam8s13 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta_1: 0.9932626307872114
[34m[1mwandb[0m: 	beta_2: 0.9904002210063344
[34m[1mwandb[0m: 	dense_1: 274
[34m[1mwandb[0m: 	dense_2: 224
[34m[1mwandb[0m: 	dense_3: 249
[34m[1mwandb[0m: 	dense_4: 110
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	learning_rate: 0.012505647636796804


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 274)               215090    
                                                                 
 dense_1 (Dense)             (None, 224)               61600     
                                                                 
 dense_2 (Dense)             (None, 249)               56025     
                                                                 
 dense_3 (Dense)             (None, 110)               27500     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▃▂▂▁▂▂▁▂▂▁▁▁▁▂
sparse_categorical_accuracy,▁▆▆▇▇▇▇██▇████▇
val_loss,█▃▃▂▂▃▁▄▁▂▂▃▂▂▃
val_sparse_categorical_accuracy,▁▃▆▆▆▆▇▇█▇█▇▆▇▆

0,1
GFLOPS,0.00036
best_epoch,8.0
best_val_loss,0.46302
epoch,14.0
loss,0.46059
sparse_categorical_accuracy,0.84708
val_loss,0.51344
val_sparse_categorical_accuracy,0.83292


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pkvlgxfk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.961409308256742
[34m[1mwandb[0m: 	beta_2: 0.9900774650771133
[34m[1mwandb[0m: 	dense_1: 270
[34m[1mwandb[0m: 	dense_2: 164
[34m[1mwandb[0m: 	dense_3: 131
[34m[1mwandb[0m: 	dense_4: 110
[34m[1mwandb[0m: 	epochs: 35
[34m[1mwandb[0m: 	learning_rate: 0.003533380896132927


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 270)               211950    
                                                                 
 dense_1 (Dense)             (None, 164)               44444     
                                                                 
 dense_2 (Dense)             (None, 131)               21615     
                                                                 
 dense_3 (Dense)             (None, 110)               14520     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
loss,█▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████
val_loss,▅▄▃▂▂▂▂▁▂▁▂▁▂▁▃▂▃▃▂▄▂▄▄▅▄▃▃▅▅▆▄█▆█▇
val_sparse_categorical_accuracy,▁▃▃▅▅▆▆▆▆▇▇▇▇▇▇▇▆▇█▇▇▆▇▇▇▇███▇██▇▇▇

0,1
GFLOPS,0.00029
best_epoch,13.0
best_val_loss,0.33262
epoch,34.0
loss,0.18178
sparse_categorical_accuracy,0.93502
val_loss,0.50054
val_sparse_categorical_accuracy,0.88367


[34m[1mwandb[0m: Agent Starting Run: w0cfmcf6 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta_1: 0.9057390456872636
[34m[1mwandb[0m: 	beta_2: 0.9912838488341884
[34m[1mwandb[0m: 	dense_1: 276
[34m[1mwandb[0m: 	dense_2: 238
[34m[1mwandb[0m: 	dense_3: 243
[34m[1mwandb[0m: 	dense_4: 45
[34m[1mwandb[0m: 	epochs: 16
[34m[1mwandb[0m: 	learning_rate: 0.004055646301818413


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 276)               216660    
                                                                 
 dense_1 (Dense)             (None, 238)               65926     
                                                                 
 dense_2 (Dense)             (None, 243)               58077     
                                                                 
 dense_3 (Dense)             (None, 45)                10980     
                                                                 
 dense_4 (Dense)             (None, 10)                4

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
loss,█▃▂▂▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▇▇▇▇█████
val_loss,▂▂▁▂▁▁▂▂▂▃█▄
val_sparse_categorical_accuracy,▁▄▂█▆█▅██▆▃█

0,1
GFLOPS,0.00035
best_epoch,5.0
best_val_loss,0.4433
epoch,11.0
loss,0.41717
sparse_categorical_accuracy,0.86644
val_loss,0.60587
val_sparse_categorical_accuracy,0.85517


[34m[1mwandb[0m: Agent Starting Run: k2p14raa with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta_1: 0.995296048229984
[34m[1mwandb[0m: 	beta_2: 0.9956710810717724
[34m[1mwandb[0m: 	dense_1: 342
[34m[1mwandb[0m: 	dense_2: 153
[34m[1mwandb[0m: 	dense_3: 205
[34m[1mwandb[0m: 	dense_4: 88
[34m[1mwandb[0m: 	epochs: 18
[34m[1mwandb[0m: 	learning_rate: 0.002982463402479623


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 342)               268470    
                                                                 
 dense_1 (Dense)             (None, 153)               52479     
                                                                 
 dense_2 (Dense)             (None, 205)               31570     
                                                                 
 dense_3 (Dense)             (None, 88)                18128     
                                                                 
 dense_4 (Dense)             (None, 10)                8

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁
sparse_categorical_accuracy,▁▄▅▆▆▆▆▇▇▇▇▇▇▇████
val_loss,█▄▄▅▄▁▄▁▁▃▃▄▂▅▂▂▄█
val_sparse_categorical_accuracy,▁▃▄▄▅▇▅▇█▇▆▆▇▇▇██▇

0,1
GFLOPS,0.00037
best_epoch,7.0
best_val_loss,0.34331
epoch,17.0
loss,0.22946
sparse_categorical_accuracy,0.91487
val_loss,0.39948
val_sparse_categorical_accuracy,0.88075


[34m[1mwandb[0m: Agent Starting Run: dvcmdwg2 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9730154646453764
[34m[1mwandb[0m: 	beta_2: 0.9951756988702044
[34m[1mwandb[0m: 	dense_1: 373
[34m[1mwandb[0m: 	dense_2: 165
[34m[1mwandb[0m: 	dense_3: 178
[34m[1mwandb[0m: 	dense_4: 112
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	learning_rate: 0.009096974531726448


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 373)               292805    
                                                                 
 dense_1 (Dense)             (None, 165)               61710     
                                                                 
 dense_2 (Dense)             (None, 178)               29548     
                                                                 
 dense_3 (Dense)             (None, 112)               20048     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
loss,█▃▃▂▂▂▂▂▁▂▂▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇▇▇▇█▇██████
val_loss,█▄▃▃▂▂▃▂▂▂▂▂▁▂▁▃▃
val_sparse_categorical_accuracy,▁▄▆▆▇▅▆▆▇▇▇▇██▇█▇

0,1
GFLOPS,0.00041
best_epoch,12.0
best_val_loss,0.34322
epoch,16.0
loss,0.26044
sparse_categorical_accuracy,0.90433
val_loss,0.3742
val_sparse_categorical_accuracy,0.87542


[34m[1mwandb[0m: Agent Starting Run: qhqsm83r with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9320401558974792
[34m[1mwandb[0m: 	beta_2: 0.9904371161435048
[34m[1mwandb[0m: 	dense_1: 263
[34m[1mwandb[0m: 	dense_2: 217
[34m[1mwandb[0m: 	dense_3: 190
[34m[1mwandb[0m: 	dense_4: 126
[34m[1mwandb[0m: 	epochs: 19
[34m[1mwandb[0m: 	learning_rate: 0.01352303245918626


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 263)               206455    
                                                                 
 dense_1 (Dense)             (None, 217)               57288     
                                                                 
 dense_2 (Dense)             (None, 190)               41420     
                                                                 
 dense_3 (Dense)             (None, 126)               24066     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██
loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇▇██████████
val_loss,█▆▂▃▃▂▂▃▂▃▁▁▂▁▂▃▂▃▂
val_sparse_categorical_accuracy,▁▃▆▆▆▇▇▇▇▇▇▇██▇█▇██

0,1
GFLOPS,0.00033
best_epoch,13.0
best_val_loss,0.38215
epoch,18.0
loss,0.31644
sparse_categorical_accuracy,0.88673
val_loss,0.40592
val_sparse_categorical_accuracy,0.8665


[34m[1mwandb[0m: Agent Starting Run: g16k4e3o with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.9919935934207348
[34m[1mwandb[0m: 	beta_2: 0.9959372812588646
[34m[1mwandb[0m: 	dense_1: 260
[34m[1mwandb[0m: 	dense_2: 228
[34m[1mwandb[0m: 	dense_3: 216
[34m[1mwandb[0m: 	dense_4: 75
[34m[1mwandb[0m: 	epochs: 24
[34m[1mwandb[0m: 	learning_rate: 0.013507538441744417


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 260)               204100    
                                                                 
 dense_1 (Dense)             (None, 228)               59508     
                                                                 
 dense_2 (Dense)             (None, 216)               49464     
                                                                 
 dense_3 (Dense)             (None, 75)                16275     
                                                                 
 dense_4 (Dense)             (None, 10)                7

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
loss,█▃▂▂▂▂▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇█▇██▇▇██████████
val_loss,█▄▄▃▄▁▁▃▂▃▁▃▄▄▃▃▃▃▃▂▁▂▃▂
val_sparse_categorical_accuracy,▁▄▃▅▅▇▇█▇▆▇▅▅▆▅▆▅▆▇███▇▇

0,1
GFLOPS,0.00033
best_epoch,5.0
best_val_loss,0.41401
epoch,23.0
loss,0.3523
sparse_categorical_accuracy,0.87498
val_loss,0.4245
val_sparse_categorical_accuracy,0.85892


[34m[1mwandb[0m: Agent Starting Run: 4nb72c98 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9534073024005936
[34m[1mwandb[0m: 	beta_2: 0.9903402042143252
[34m[1mwandb[0m: 	dense_1: 256
[34m[1mwandb[0m: 	dense_2: 246
[34m[1mwandb[0m: 	dense_3: 254
[34m[1mwandb[0m: 	dense_4: 115
[34m[1mwandb[0m: 	epochs: 24
[34m[1mwandb[0m: 	learning_rate: 0.01507761915292622


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 256)               200960    
                                                                 
 dense_1 (Dense)             (None, 246)               63222     
                                                                 
 dense_2 (Dense)             (None, 254)               62738     
                                                                 
 dense_3 (Dense)             (None, 115)               29325     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
loss,█▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇▇▇▇█████████████
val_loss,▆▃▃▂▃▃▂▃▂▄▃▁▃▂▂▄▂▂▂▃▃▂█▃
val_sparse_categorical_accuracy,▁▄▅▆▅▆▆▆▇▆▆█▆▇▇▇▇▆█▆▇▇▆▇

0,1
GFLOPS,0.00036
best_epoch,11.0
best_val_loss,0.39373
epoch,23.0
loss,0.33153
sparse_categorical_accuracy,0.88529
val_loss,0.42637
val_sparse_categorical_accuracy,0.86758


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sepa733j with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.949515760427038
[34m[1mwandb[0m: 	beta_2: 0.9907676604944448
[34m[1mwandb[0m: 	dense_1: 301
[34m[1mwandb[0m: 	dense_2: 215
[34m[1mwandb[0m: 	dense_3: 199
[34m[1mwandb[0m: 	dense_4: 106
[34m[1mwandb[0m: 	epochs: 33
[34m[1mwandb[0m: 	learning_rate: 0.0016436519170975265


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 301)               236285    
                                                                 
 dense_1 (Dense)             (None, 215)               64930     
                                                                 
 dense_2 (Dense)             (None, 199)               42984     
                                                                 
 dense_3 (Dense)             (None, 106)               21200     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇███
loss,█▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇█▇████████
val_loss,█▃▄▃▃▂▃▃▁▁▂▂▂▂▂▃▂▄▄▄▄▄▄▅▅▆▆▆▅▇▇▇▇
val_sparse_categorical_accuracy,▁▅▄▅▆▇▆▆▇▇▇▇▇▇▇▇██▇▇█▇██▇▇███████

0,1
GFLOPS,0.00037
best_epoch,8.0
best_val_loss,0.3082
epoch,32.0
loss,0.11171
sparse_categorical_accuracy,0.95642
val_loss,0.42278
val_sparse_categorical_accuracy,0.89325


[34m[1mwandb[0m: Agent Starting Run: bmk59cp0 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9749170065819144
[34m[1mwandb[0m: 	beta_2: 0.9949239647312176
[34m[1mwandb[0m: 	dense_1: 283
[34m[1mwandb[0m: 	dense_2: 240
[34m[1mwandb[0m: 	dense_3: 203
[34m[1mwandb[0m: 	dense_4: 123
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	learning_rate: 0.0009689533196204632


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 283)               222155    
                                                                 
 dense_1 (Dense)             (None, 240)               68160     
                                                                 
 dense_2 (Dense)             (None, 203)               48923     
                                                                 
 dense_3 (Dense)             (None, 123)               25092     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▅▄▃▃▃▂▂▂▂▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▆▆▆▆▇▇▇▇████
val_loss,█▄▄▃▂▂▂▂▂▁▂▂▁▁▂
val_sparse_categorical_accuracy,▁▄▅▅▇▆▆▆▇█▇████

0,1
GFLOPS,0.00037
best_epoch,9.0
best_val_loss,0.30804
epoch,14.0
loss,0.19001
sparse_categorical_accuracy,0.92919
val_loss,0.32465
val_sparse_categorical_accuracy,0.89292


[34m[1mwandb[0m: Agent Starting Run: ci96mb4w with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta_1: 0.9807599364023232
[34m[1mwandb[0m: 	beta_2: 0.9903750628077655
[34m[1mwandb[0m: 	dense_1: 381
[34m[1mwandb[0m: 	dense_2: 165
[34m[1mwandb[0m: 	dense_3: 234
[34m[1mwandb[0m: 	dense_4: 97
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	learning_rate: 0.0033707624683789864


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 381)               299085    
                                                                 
 dense_1 (Dense)             (None, 165)               63030     
                                                                 
 dense_2 (Dense)             (None, 234)               38844     
                                                                 
 dense_3 (Dense)             (None, 97)                22795     
                                                                 
 dense_4 (Dense)             (None, 10)                9

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▆▆▇▇▇▇▇▇████
val_loss,█▄▃▁▃▂▂▁▂▁▃█▃▁▄
val_sparse_categorical_accuracy,▁▃▅▇▅▅▇▇▆▇▆▇▆▇█

0,1
GFLOPS,0.00042
best_epoch,3.0
best_val_loss,0.3484
epoch,14.0
loss,0.25842
sparse_categorical_accuracy,0.906
val_loss,0.38647
val_sparse_categorical_accuracy,0.8885


[34m[1mwandb[0m: Agent Starting Run: 40dunp3m with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.986435025614692
[34m[1mwandb[0m: 	beta_2: 0.9904482393527106
[34m[1mwandb[0m: 	dense_1: 485
[34m[1mwandb[0m: 	dense_2: 176
[34m[1mwandb[0m: 	dense_3: 131
[34m[1mwandb[0m: 	dense_4: 122
[34m[1mwandb[0m: 	epochs: 19
[34m[1mwandb[0m: 	learning_rate: 0.00012885226472854095


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 485)               380725    
                                                                 
 dense_1 (Dense)             (None, 176)               85536     
                                                                 
 dense_2 (Dense)             (None, 131)               23187     
                                                                 
 dense_3 (Dense)             (None, 122)               16104     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██
loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
sparse_categorical_accuracy,▁▅▅▆▆▆▆▇▇▇▇▇▇▇█████
val_loss,█▆▄▄▄▄▃▃▂▃▂▂▂▂▂▁▁▁▁
val_sparse_categorical_accuracy,▁▃▄▅▅▅▆▆▇▆▇▇▇▇▇████

0,1
GFLOPS,0.00051
best_epoch,17.0
best_val_loss,0.3036
epoch,18.0
loss,0.21257
sparse_categorical_accuracy,0.92242
val_loss,0.31006
val_sparse_categorical_accuracy,0.89242


[34m[1mwandb[0m: Agent Starting Run: pgjpspdq with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.9970566462993242
[34m[1mwandb[0m: 	beta_2: 0.9936644799927176
[34m[1mwandb[0m: 	dense_1: 273
[34m[1mwandb[0m: 	dense_2: 253
[34m[1mwandb[0m: 	dense_3: 146
[34m[1mwandb[0m: 	dense_4: 74
[34m[1mwandb[0m: 	epochs: 18
[34m[1mwandb[0m: 	learning_rate: 0.000213185092465852


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 273)               214305    
                                                                 
 dense_1 (Dense)             (None, 253)               69322     
                                                                 
 dense_2 (Dense)             (None, 146)               37084     
                                                                 
 dense_3 (Dense)             (None, 74)                10878     
                                                                 
 dense_4 (Dense)             (None, 10)                7

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
loss,█▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▆▆▆▆▇▇▇▇▇██████
val_loss,█▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁
val_sparse_categorical_accuracy,▁▃▄▅▆▆▇▇▆▇█▇▇█▇█▇█

0,1
GFLOPS,0.00033
best_epoch,13.0
best_val_loss,0.31196
epoch,17.0
loss,0.2115
sparse_categorical_accuracy,0.92092
val_loss,0.31392
val_sparse_categorical_accuracy,0.89108


[34m[1mwandb[0m: Agent Starting Run: ozohopyq with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.993354122356796
[34m[1mwandb[0m: 	beta_2: 0.9938310330532448
[34m[1mwandb[0m: 	dense_1: 326
[34m[1mwandb[0m: 	dense_2: 211
[34m[1mwandb[0m: 	dense_3: 207
[34m[1mwandb[0m: 	dense_4: 76
[34m[1mwandb[0m: 	epochs: 23
[34m[1mwandb[0m: 	learning_rate: 0.0032593569340499504


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 326)               255910    
                                                                 
 dense_1 (Dense)             (None, 211)               68997     
                                                                 
 dense_2 (Dense)             (None, 207)               43884     
                                                                 
 dense_3 (Dense)             (None, 76)                15808     
                                                                 
 dense_4 (Dense)             (None, 10)                7

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇██
loss,█▅▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇██████
val_loss,█▆▄▃▂▂▁▃▂▂▂▂▁▃▂▂▃▂▄▃▃▄▄
val_sparse_categorical_accuracy,▁▂▄▅▆▆▆▆▆▇▆▆▇▆▇▇▇▇▇█▇█▇

0,1
GFLOPS,0.00039
best_epoch,12.0
best_val_loss,0.33116
epoch,22.0
loss,0.1982
sparse_categorical_accuracy,0.92558
val_loss,0.37308
val_sparse_categorical_accuracy,0.88833


[34m[1mwandb[0m: Agent Starting Run: mxqyng7l with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9410802192945096
[34m[1mwandb[0m: 	beta_2: 0.9917467928697103
[34m[1mwandb[0m: 	dense_1: 370
[34m[1mwandb[0m: 	dense_2: 140
[34m[1mwandb[0m: 	dense_3: 137
[34m[1mwandb[0m: 	dense_4: 112
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	learning_rate: 0.0003536157464542421


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 370)               290450    
                                                                 
 dense_1 (Dense)             (None, 140)               51940     
                                                                 
 dense_2 (Dense)             (None, 137)               19317     
                                                                 
 dense_3 (Dense)             (None, 112)               15456     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
loss,█▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁
sparse_categorical_accuracy,▁▅▅▆▆▆▇▇▇▇▇▇▇████
val_loss,█▇▅▄▃▂▂▂▁▂▂▁▁▁▂▂▁
val_sparse_categorical_accuracy,▁▂▄▄▅▆▇▇█▇▇▇███▇█

0,1
GFLOPS,0.00038
best_epoch,12.0
best_val_loss,0.30629
epoch,16.0
loss,0.19521
sparse_categorical_accuracy,0.92808
val_loss,0.31087
val_sparse_categorical_accuracy,0.89058


[34m[1mwandb[0m: Agent Starting Run: ogtpphm9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta_1: 0.9930164445141758
[34m[1mwandb[0m: 	beta_2: 0.9925486743540224
[34m[1mwandb[0m: 	dense_1: 428
[34m[1mwandb[0m: 	dense_2: 230
[34m[1mwandb[0m: 	dense_3: 139
[34m[1mwandb[0m: 	dense_4: 97
[34m[1mwandb[0m: 	epochs: 26
[34m[1mwandb[0m: 	learning_rate: 0.0066023443443931126


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 428)               335980    
                                                                 
 dense_1 (Dense)             (None, 230)               98670     
                                                                 
 dense_2 (Dense)             (None, 139)               32109     
                                                                 
 dense_3 (Dense)             (None, 97)                13580     
                                                                 
 dense_4 (Dense)             (None, 10)                9

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▂▂▂▂▂▁▂▁
sparse_categorical_accuracy,▁▅▇▇▇▇▇███
val_loss,▆█▇▂▃▅▂▁▇▂
val_sparse_categorical_accuracy,▁▃▂▆▅▄▇▇▅█

0,1
GFLOPS,0.00048
best_epoch,7.0
best_val_loss,0.41414
epoch,9.0
loss,0.37215
sparse_categorical_accuracy,0.87213
val_loss,0.42707
val_sparse_categorical_accuracy,0.86708


[34m[1mwandb[0m: Agent Starting Run: 7zyxdb6p with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9473250336406532
[34m[1mwandb[0m: 	beta_2: 0.9901312819229128
[34m[1mwandb[0m: 	dense_1: 348
[34m[1mwandb[0m: 	dense_2: 212
[34m[1mwandb[0m: 	dense_3: 230
[34m[1mwandb[0m: 	dense_4: 101
[34m[1mwandb[0m: 	epochs: 18
[34m[1mwandb[0m: 	learning_rate: 0.01010964017752888


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 348)               273180    
                                                                 
 dense_1 (Dense)             (None, 212)               73988     
                                                                 
 dense_2 (Dense)             (None, 230)               48990     
                                                                 
 dense_3 (Dense)             (None, 101)               23331     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
loss,█▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇▇▇█▇███████
val_loss,█▃▁▂▁▂▂▂▁▂▂▂▂▂▂▂▃▂
val_sparse_categorical_accuracy,▁▅▆▆▇▆▇▇█▇▆▆▇█▇██▇

0,1
GFLOPS,0.00042
best_epoch,8.0
best_val_loss,0.36074
epoch,17.0
loss,0.27342
sparse_categorical_accuracy,0.90244
val_loss,0.38525
val_sparse_categorical_accuracy,0.87575


[34m[1mwandb[0m: Agent Starting Run: m9lehkqi with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9448538305274902
[34m[1mwandb[0m: 	beta_2: 0.9908200748340806
[34m[1mwandb[0m: 	dense_1: 302
[34m[1mwandb[0m: 	dense_2: 196
[34m[1mwandb[0m: 	dense_3: 217
[34m[1mwandb[0m: 	dense_4: 93
[34m[1mwandb[0m: 	epochs: 18
[34m[1mwandb[0m: 	learning_rate: 0.011094232308078456


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 302)               237070    
                                                                 
 dense_1 (Dense)             (None, 196)               59388     
                                                                 
 dense_2 (Dense)             (None, 217)               42749     
                                                                 
 dense_3 (Dense)             (None, 93)                20274     
                                                                 
 dense_4 (Dense)             (None, 10)                9

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
loss,█▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▆▇▇▇▇▇▇▇███████
val_loss,█▄▂▇▃▁▁▆▂▄▁▃▂▃▃▃▄▄
val_sparse_categorical_accuracy,▁▄▅▂▅▇▇▃█▆█▆▇▆▇██▆

0,1
GFLOPS,0.00036
best_epoch,5.0
best_val_loss,0.36671
epoch,17.0
loss,0.27567
sparse_categorical_accuracy,0.89929
val_loss,0.39827
val_sparse_categorical_accuracy,0.8705


[34m[1mwandb[0m: Agent Starting Run: z4rrcv5y with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9886338794503788
[34m[1mwandb[0m: 	beta_2: 0.992100042356301
[34m[1mwandb[0m: 	dense_1: 343
[34m[1mwandb[0m: 	dense_2: 152
[34m[1mwandb[0m: 	dense_3: 188
[34m[1mwandb[0m: 	dense_4: 117
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	learning_rate: 0.021687747798520392


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 343)               269255    
                                                                 
 dense_1 (Dense)             (None, 152)               52288     
                                                                 
 dense_2 (Dense)             (None, 188)               28764     
                                                                 
 dense_3 (Dense)             (None, 117)               22113     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
loss,█▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▅▅▆▇▇▇▇███████
val_loss,█▆▅▄▅▄▅▃▂▂▂▁▂▃▂▂▂
val_sparse_categorical_accuracy,▁▂▂▃▄▅▅▆▇█████▇▇█

0,1
GFLOPS,0.00037
best_epoch,11.0
best_val_loss,0.45531
epoch,16.0
loss,0.40876
sparse_categorical_accuracy,0.85877
val_loss,0.47497
val_sparse_categorical_accuracy,0.84033


[34m[1mwandb[0m: Agent Starting Run: iwfsk39h with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta_1: 0.9959680281129611
[34m[1mwandb[0m: 	beta_2: 0.9933086429891596
[34m[1mwandb[0m: 	dense_1: 345
[34m[1mwandb[0m: 	dense_2: 171
[34m[1mwandb[0m: 	dense_3: 253
[34m[1mwandb[0m: 	dense_4: 74
[34m[1mwandb[0m: 	epochs: 19
[34m[1mwandb[0m: 	learning_rate: 0.0045660284086622505


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 345)               270825    
                                                                 
 dense_1 (Dense)             (None, 171)               59166     
                                                                 
 dense_2 (Dense)             (None, 253)               43516     
                                                                 
 dense_3 (Dense)             (None, 74)                18796     
                                                                 
 dense_4 (Dense)             (None, 10)                7

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██
loss,█▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▆▇▇▇▇▇▇▇██████
val_loss,██▁▂▅▂▁▂▁▃▁▁▂▃▂▄▁▂▂
val_sparse_categorical_accuracy,▂▁▅▅▄▅▇▆▇▆▇█▆▇▇▇█▇█

0,1
GFLOPS,0.00039
best_epoch,10.0
best_val_loss,0.37027
epoch,18.0
loss,0.27278
sparse_categorical_accuracy,0.90013
val_loss,0.38299
val_sparse_categorical_accuracy,0.88008


[34m[1mwandb[0m: Agent Starting Run: grcxmmy5 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9696908662593676
[34m[1mwandb[0m: 	beta_2: 0.9977753857479266
[34m[1mwandb[0m: 	dense_1: 261
[34m[1mwandb[0m: 	dense_2: 185
[34m[1mwandb[0m: 	dense_3: 197
[34m[1mwandb[0m: 	dense_4: 123
[34m[1mwandb[0m: 	epochs: 29
[34m[1mwandb[0m: 	learning_rate: 0.007533881364763507


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 261)               204885    
                                                                 
 dense_1 (Dense)             (None, 185)               48470     
                                                                 
 dense_2 (Dense)             (None, 197)               36642     
                                                                 
 dense_3 (Dense)             (None, 123)               24354     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
loss,█▄▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▂▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█▇▇████▇████
val_loss,█▅▃▁▂▄▃▂▃▂▂▂▁▂▂▂▂▃▃▄▄▄▄▃▄▄▃▃▄
val_sparse_categorical_accuracy,▁▄▅▆▆▄▅▅▄▆▆▇█▇▇▇▇█▇▆█▇▆█▇███▇

0,1
GFLOPS,0.00032
best_epoch,3.0
best_val_loss,0.34273
epoch,28.0
loss,0.21969
sparse_categorical_accuracy,0.91948
val_loss,0.38934
val_sparse_categorical_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: 9lezmfg5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.9986365816905892
[34m[1mwandb[0m: 	beta_2: 0.9909609459882592
[34m[1mwandb[0m: 	dense_1: 314
[34m[1mwandb[0m: 	dense_2: 138
[34m[1mwandb[0m: 	dense_3: 230
[34m[1mwandb[0m: 	dense_4: 81
[34m[1mwandb[0m: 	epochs: 41
[34m[1mwandb[0m: 	learning_rate: 0.003603855205813318


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 314)               246490    
                                                                 
 dense_1 (Dense)             (None, 138)               43470     
                                                                 
 dense_2 (Dense)             (None, 230)               31970     
                                                                 
 dense_3 (Dense)             (None, 81)                18711     
                                                                 
 dense_4 (Dense)             (None, 10)                8

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁███
sparse_categorical_accuracy,▇███████▇▆▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁███▇
val_sparse_categorical_accuracy,████████▇▆▄▄▂▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
GFLOPS,0.00034
best_epoch,3.0
best_val_loss,0.36644
epoch,40.0
loss,2.8388790335632205e+20
sparse_categorical_accuracy,0.12233
val_loss,5.018739722839654e+16
val_sparse_categorical_accuracy,0.0955


[34m[1mwandb[0m: Agent Starting Run: qkzd90qg with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9852502397410988
[34m[1mwandb[0m: 	beta_2: 0.9920882804832444
[34m[1mwandb[0m: 	dense_1: 323
[34m[1mwandb[0m: 	dense_2: 173
[34m[1mwandb[0m: 	dense_3: 187
[34m[1mwandb[0m: 	dense_4: 61
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	learning_rate: 0.0020471564926618766


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 323)               253555    
                                                                 
 dense_1 (Dense)             (None, 173)               56052     
                                                                 
 dense_2 (Dense)             (None, 187)               32538     
                                                                 
 dense_3 (Dense)             (None, 61)                11468     
                                                                 
 dense_4 (Dense)             (None, 10)                6

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▇▇▇▇▇▇████
val_loss,█▄▄▃▂▂▁▁▁▁▂▁▁▁▁
val_sparse_categorical_accuracy,▁▄▄▅▆▆▆▇▇▇▆▇█▇█

0,1
GFLOPS,0.00035
best_epoch,9.0
best_val_loss,0.32179
epoch,14.0
loss,0.20721
sparse_categorical_accuracy,0.9221
val_loss,0.32871
val_sparse_categorical_accuracy,0.89267


[34m[1mwandb[0m: Agent Starting Run: 9qasrrve with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9691021422519166
[34m[1mwandb[0m: 	beta_2: 0.9947775069279772
[34m[1mwandb[0m: 	dense_1: 340
[34m[1mwandb[0m: 	dense_2: 209
[34m[1mwandb[0m: 	dense_3: 219
[34m[1mwandb[0m: 	dense_4: 55
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	learning_rate: 0.025169935601093256


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 340)               266900    
                                                                 
 dense_1 (Dense)             (None, 209)               71269     
                                                                 
 dense_2 (Dense)             (None, 219)               45990     
                                                                 
 dense_3 (Dense)             (None, 55)                12100     
                                                                 
 dense_4 (Dense)             (None, 10)                5

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇▇████████
val_loss,█▅▃▃▂▃▂▁▃▂▁▂▆▂▂
val_sparse_categorical_accuracy,▁▄▆▆▇▆▇█▇▇██▅██

0,1
GFLOPS,0.0004
best_epoch,7.0
best_val_loss,0.43378
epoch,14.0
loss,0.38297
sparse_categorical_accuracy,0.8641
val_loss,0.44831
val_sparse_categorical_accuracy,0.85067


[34m[1mwandb[0m: Agent Starting Run: m8rh9js2 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.969790584974788
[34m[1mwandb[0m: 	beta_2: 0.9901273036319144
[34m[1mwandb[0m: 	dense_1: 304
[34m[1mwandb[0m: 	dense_2: 151
[34m[1mwandb[0m: 	dense_3: 136
[34m[1mwandb[0m: 	dense_4: 118
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	learning_rate: 0.008402867130880303


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 304)               238640    
                                                                 
 dense_1 (Dense)             (None, 151)               46055     
                                                                 
 dense_2 (Dense)             (None, 136)               20672     
                                                                 
 dense_3 (Dense)             (None, 118)               16166     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▇▇▇▇▇▇▇█████
val_loss,█▅▃▄▅▂▂▂▃▂▁▂▁▃▄
val_sparse_categorical_accuracy,▁▄▆▅▃▆▆▆▇▇▇▇▇█▇

0,1
GFLOPS,0.00032
best_epoch,12.0
best_val_loss,0.35526
epoch,14.0
loss,0.27167
sparse_categorical_accuracy,0.89998
val_loss,0.38964
val_sparse_categorical_accuracy,0.87158


[34m[1mwandb[0m: Agent Starting Run: 8uxxx4lv with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9496271779065476
[34m[1mwandb[0m: 	beta_2: 0.9918424194654952
[34m[1mwandb[0m: 	dense_1: 425
[34m[1mwandb[0m: 	dense_2: 145
[34m[1mwandb[0m: 	dense_3: 213
[34m[1mwandb[0m: 	dense_4: 93
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	learning_rate: 0.017636170499021338


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 425)               333625    
                                                                 
 dense_1 (Dense)             (None, 145)               61770     
                                                                 
 dense_2 (Dense)             (None, 213)               31098     
                                                                 
 dense_3 (Dense)             (None, 93)                19902     
                                                                 
 dense_4 (Dense)             (None, 10)                9

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇▇▇▇███████████
val_loss,█▃▄▂▁▂▁▂▁▂▁▁▂▂▁▁▃▂▂▂
val_sparse_categorical_accuracy,▁▅▄▆▆▆▇▇▇▇█▇▇▇██▇▇▆▇

0,1
GFLOPS,0.00045
best_epoch,11.0
best_val_loss,0.39054
epoch,19.0
loss,0.32418
sparse_categorical_accuracy,0.88567
val_loss,0.4148
val_sparse_categorical_accuracy,0.86275


[34m[1mwandb[0m: Agent Starting Run: mcnum2n1 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9081671286821668
[34m[1mwandb[0m: 	beta_2: 0.9906941671931154
[34m[1mwandb[0m: 	dense_1: 274
[34m[1mwandb[0m: 	dense_2: 176
[34m[1mwandb[0m: 	dense_3: 159
[34m[1mwandb[0m: 	dense_4: 109
[34m[1mwandb[0m: 	epochs: 32
[34m[1mwandb[0m: 	learning_rate: 0.003792496207863434


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 274)               215090    
                                                                 
 dense_1 (Dense)             (None, 176)               48400     
                                                                 
 dense_2 (Dense)             (None, 159)               28143     
                                                                 
 dense_3 (Dense)             (None, 109)               17440     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇███
loss,█▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██████████
val_loss,█▄▃▃▃▂▁▂▂▂▃▃▂▂▂▁▂▃▃▄▂▃▄▄▄▅▅▆▄▆▅▆
val_sparse_categorical_accuracy,▁▃▆▄▅▅▇▇▆▆▇▆▇█▇█▇██▇██▇████▇█▇▇█

0,1
GFLOPS,0.00031
best_epoch,15.0
best_val_loss,0.31658
epoch,31.0
loss,0.15726
sparse_categorical_accuracy,0.93983
val_loss,0.43979
val_sparse_categorical_accuracy,0.88983


[34m[1mwandb[0m: Agent Starting Run: 0112yjxg with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9596448116425584
[34m[1mwandb[0m: 	beta_2: 0.9945362314094058
[34m[1mwandb[0m: 	dense_1: 313
[34m[1mwandb[0m: 	dense_2: 204
[34m[1mwandb[0m: 	dense_3: 237
[34m[1mwandb[0m: 	dense_4: 95
[34m[1mwandb[0m: 	epochs: 19
[34m[1mwandb[0m: 	learning_rate: 0.011294378135463716


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 313)               245705    
                                                                 
 dense_1 (Dense)             (None, 204)               64056     
                                                                 
 dense_2 (Dense)             (None, 237)               48585     
                                                                 
 dense_3 (Dense)             (None, 95)                22610     
                                                                 
 dense_4 (Dense)             (None, 10)                9

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██
loss,█▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇▇▇▇██████████
val_loss,█▄▂▃▃▂▃▃▂▁▃▂▁▂▁▂▂▂▂
val_sparse_categorical_accuracy,▁▅▇▆▆▇▆▆▇▇▇▇█▇██▇▇█

0,1
GFLOPS,0.00038
best_epoch,12.0
best_val_loss,0.3541
epoch,18.0
loss,0.27338
sparse_categorical_accuracy,0.90125
val_loss,0.37269
val_sparse_categorical_accuracy,0.87825


[34m[1mwandb[0m: Agent Starting Run: z2jdekpl with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.932629399370042
[34m[1mwandb[0m: 	beta_2: 0.9930107701003396
[34m[1mwandb[0m: 	dense_1: 353
[34m[1mwandb[0m: 	dense_2: 156
[34m[1mwandb[0m: 	dense_3: 196
[34m[1mwandb[0m: 	dense_4: 124
[34m[1mwandb[0m: 	epochs: 21
[34m[1mwandb[0m: 	learning_rate: 0.002174924049438813


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 353)               277105    
                                                                 
 dense_1 (Dense)             (None, 156)               55224     
                                                                 
 dense_2 (Dense)             (None, 196)               30772     
                                                                 
 dense_3 (Dense)             (None, 124)               24428     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇▇██
loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
sparse_categorical_accuracy,▁▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇█████
val_loss,█▅▃▄▄▁▁▃▁▄▃▂▃▂▄▃▂▃▄▅▃
val_sparse_categorical_accuracy,▁▃▄▄▄▆▆▆▆▆▇▇▇█▇▇██▇▇█

0,1
GFLOPS,0.00039
best_epoch,6.0
best_val_loss,0.32252
epoch,20.0
loss,0.18601
sparse_categorical_accuracy,0.92919
val_loss,0.35501
val_sparse_categorical_accuracy,0.893


[34m[1mwandb[0m: Agent Starting Run: hvie1hdu with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9575268902820964
[34m[1mwandb[0m: 	beta_2: 0.9959139127373968
[34m[1mwandb[0m: 	dense_1: 265
[34m[1mwandb[0m: 	dense_2: 160
[34m[1mwandb[0m: 	dense_3: 213
[34m[1mwandb[0m: 	dense_4: 68
[34m[1mwandb[0m: 	epochs: 16
[34m[1mwandb[0m: 	learning_rate: 0.02442626463836395


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 265)               208025    
                                                                 
 dense_1 (Dense)             (None, 160)               42560     
                                                                 
 dense_2 (Dense)             (None, 213)               34293     
                                                                 
 dense_3 (Dense)             (None, 68)                14552     
                                                                 
 dense_4 (Dense)             (None, 10)                6

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▇▇▇████████████
val_loss,▆▄▄▃▆▁▄▅▁▂▃▅▁█▄▃
val_sparse_categorical_accuracy,▂▄▄▅▃█▅▇▇▆▆▆█▁▅▆

0,1
GFLOPS,0.0003
best_epoch,8.0
best_val_loss,0.45541
epoch,15.0
loss,0.44308
sparse_categorical_accuracy,0.84535
val_loss,0.47822
val_sparse_categorical_accuracy,0.83708


[34m[1mwandb[0m: Agent Starting Run: inrhw61g with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta_1: 0.9744285323465486
[34m[1mwandb[0m: 	beta_2: 0.9924856661592542
[34m[1mwandb[0m: 	dense_1: 300
[34m[1mwandb[0m: 	dense_2: 190
[34m[1mwandb[0m: 	dense_3: 209
[34m[1mwandb[0m: 	dense_4: 104
[34m[1mwandb[0m: 	epochs: 18
[34m[1mwandb[0m: 	learning_rate: 0.004198458621312241


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 dense_1 (Dense)             (None, 190)               57190     
                                                                 
 dense_2 (Dense)             (None, 209)               39919     
                                                                 
 dense_3 (Dense)             (None, 104)               21840     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
loss,█▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▅▆▆▆▇▇▇▇▇█▇█████
val_loss,█▆▇▅▂▃▂▂▂▆▃▁▆▅▃▃▂▄
val_sparse_categorical_accuracy,▁▃▂▃▅▆▆▆▇▆▇█▇▇█▇██

0,1
GFLOPS,0.00036
best_epoch,11.0
best_val_loss,0.34328
epoch,17.0
loss,0.24166
sparse_categorical_accuracy,0.91037
val_loss,0.37816
val_sparse_categorical_accuracy,0.88608


[34m[1mwandb[0m: Agent Starting Run: w5wcyl0g with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9768455915384584
[34m[1mwandb[0m: 	beta_2: 0.9903106089490604
[34m[1mwandb[0m: 	dense_1: 288
[34m[1mwandb[0m: 	dense_2: 251
[34m[1mwandb[0m: 	dense_3: 206
[34m[1mwandb[0m: 	dense_4: 110
[34m[1mwandb[0m: 	epochs: 39
[34m[1mwandb[0m: 	learning_rate: 0.04304114601549639


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 288)               226080    
                                                                 
 dense_1 (Dense)             (None, 251)               72539     
                                                                 
 dense_2 (Dense)             (None, 206)               51912     
                                                                 
 dense_3 (Dense)             (None, 110)               22770     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇▇█▇▇▇██▇████▇█▇▇▇▇▇▇▇█████▇██▇████
val_loss,█▄▄█▃▂▂▅▅▁▄▂▂▂▂▃▄▃▂▅▅▄▃▃▃▃▃▃▂▂▂▂▁▂▂▂▂▂▂
val_sparse_categorical_accuracy,▁▅▅▂▇▇█▆▆█▅▇▇█▇▆▅▆▇▄▄▆▆▆▆▆▆▆▇▆▇▇█▇▇▇▇█▇

0,1
GFLOPS,0.00037
best_epoch,32.0
best_val_loss,0.68273
epoch,38.0
loss,0.68388
sparse_categorical_accuracy,0.74767
val_loss,0.70817
val_sparse_categorical_accuracy,0.74117


[34m[1mwandb[0m: Agent Starting Run: 28h24dyx with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9155385650966376
[34m[1mwandb[0m: 	beta_2: 0.9900508064668974
[34m[1mwandb[0m: 	dense_1: 509
[34m[1mwandb[0m: 	dense_2: 128
[34m[1mwandb[0m: 	dense_3: 138
[34m[1mwandb[0m: 	dense_4: 126
[34m[1mwandb[0m: 	epochs: 24
[34m[1mwandb[0m: 	learning_rate: 0.03308452104303661


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 509)               399565    
                                                                 
 dense_1 (Dense)             (None, 128)               65280     
                                                                 
 dense_2 (Dense)             (None, 138)               17802     
                                                                 
 dense_3 (Dense)             (None, 126)               17514     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▇▇▇█████▇█████████████
val_loss,█▆▃▂▃▃▆▃▇▆▄▁▃▃▃▁▁▂▂▄▆▄▄▃
val_sparse_categorical_accuracy,▁▄▆▆▆▇▃▇▆▃▅█▇▄▅█▆▇▇▆▃▇▆▇

0,1
GFLOPS,0.0005
best_epoch,11.0
best_val_loss,0.45508
epoch,23.0
loss,0.42339
sparse_categorical_accuracy,0.85694
val_loss,0.48285
val_sparse_categorical_accuracy,0.84567


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hx0jis8n with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9815581493525448
[34m[1mwandb[0m: 	beta_2: 0.9937475577629672
[34m[1mwandb[0m: 	dense_1: 257
[34m[1mwandb[0m: 	dense_2: 136
[34m[1mwandb[0m: 	dense_3: 159
[34m[1mwandb[0m: 	dense_4: 100
[34m[1mwandb[0m: 	epochs: 18
[34m[1mwandb[0m: 	learning_rate: 0.008898951745587667


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 257)               201745    
                                                                 
 dense_1 (Dense)             (None, 136)               35088     
                                                                 
 dense_2 (Dense)             (None, 159)               21783     
                                                                 
 dense_3 (Dense)             (None, 100)               16000     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▇▇▇▇▇█████████
val_loss,█▅▄▂▂▂▃▁▂▁▄▁▃▂▄▂▁▂
val_sparse_categorical_accuracy,▁▄▄▅▅▆▆▇▆▆▆▇▇▇▆▇██

0,1
GFLOPS,0.00028
best_epoch,11.0
best_val_loss,0.34985
epoch,17.0
loss,0.25904
sparse_categorical_accuracy,0.90442
val_loss,0.35831
val_sparse_categorical_accuracy,0.88267


[34m[1mwandb[0m: Agent Starting Run: iklvrsq8 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9867983216864308
[34m[1mwandb[0m: 	beta_2: 0.9939076262444322
[34m[1mwandb[0m: 	dense_1: 310
[34m[1mwandb[0m: 	dense_2: 182
[34m[1mwandb[0m: 	dense_3: 224
[34m[1mwandb[0m: 	dense_4: 41
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	learning_rate: 0.007630100864385024


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 310)               243350    
                                                                 
 dense_1 (Dense)             (None, 182)               56602     
                                                                 
 dense_2 (Dense)             (None, 224)               40992     
                                                                 
 dense_3 (Dense)             (None, 41)                9225      
                                                                 
 dense_4 (Dense)             (None, 10)                4

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▆▆▇▇▇▇████████
val_loss,█▅▃▃▂▂▂▂▂▁▁▁▁▂▂
val_sparse_categorical_accuracy,▁▄▆▆▇▇▇▇▇█▇█▇▇█

0,1
GFLOPS,0.00035
best_epoch,11.0
best_val_loss,0.3529
epoch,14.0
loss,0.25956
sparse_categorical_accuracy,0.90485
val_loss,0.37363
val_sparse_categorical_accuracy,0.87892


[34m[1mwandb[0m: Agent Starting Run: bznqqjeq with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9355128911534324
[34m[1mwandb[0m: 	beta_2: 0.9910382609109922
[34m[1mwandb[0m: 	dense_1: 273
[34m[1mwandb[0m: 	dense_2: 151
[34m[1mwandb[0m: 	dense_3: 200
[34m[1mwandb[0m: 	dense_4: 87
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	learning_rate: 0.008662586331871275


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 273)               214305    
                                                                 
 dense_1 (Dense)             (None, 151)               41374     
                                                                 
 dense_2 (Dense)             (None, 200)               30400     
                                                                 
 dense_3 (Dense)             (None, 87)                17487     
                                                                 
 dense_4 (Dense)             (None, 10)                8

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▆▆▆▇▇▇▇▇▇▇▇███████████
val_loss,█▃▂▂▄▄▂▂▁▂▁▁▂▁▂▃▂▁▃▁▃▃▂▆▄
val_sparse_categorical_accuracy,▁▅▅▆▅▅▆▆▆▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇

0,1
GFLOPS,0.0003
best_epoch,13.0
best_val_loss,0.35723
epoch,24.0
loss,0.25335
sparse_categorical_accuracy,0.90977
val_loss,0.41014
val_sparse_categorical_accuracy,0.87892


[34m[1mwandb[0m: Agent Starting Run: vo7fu6ix with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta_1: 0.9888964593164976
[34m[1mwandb[0m: 	beta_2: 0.992148909221722
[34m[1mwandb[0m: 	dense_1: 272
[34m[1mwandb[0m: 	dense_2: 248
[34m[1mwandb[0m: 	dense_3: 234
[34m[1mwandb[0m: 	dense_4: 125
[34m[1mwandb[0m: 	epochs: 29
[34m[1mwandb[0m: 	learning_rate: 0.022947780943994897


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 28, 28)            0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 272)               213520    
                                                                 
 dense_1 (Dense)             (None, 248)               67704     
                                                                 
 dense_2 (Dense)             (None, 234)               58266     
                                                                 
 dense_3 (Dense)             (None, 125)               29375     
                                                                 
 dense_4 (Dense)             (None, 10)                1

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
loss,█▃▂▂▂▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁
sparse_categorical_accuracy,▁▅▆▇▇▇▇▇▇▇█████▇███▇▇▇███████
val_loss,█▇▃▃▁▂▂▂▁▂▃▁▃▁▂▂▃▃▃▃▆▃▂▅▂▂▂▃▂
val_sparse_categorical_accuracy,▁▃▅▅▇▇██▇▇▅▇▆█▇▇▇▇▇▆▆▆█▇██▇▇▇

0,1
GFLOPS,0.00037
best_epoch,4.0
best_val_loss,0.44566
epoch,28.0
loss,0.39973
sparse_categorical_accuracy,0.8644
val_loss,0.47245
val_sparse_categorical_accuracy,0.84725


## Show best hyperparameters

In [7]:
api = wandb.Api()
sweep = api.sweep("nsiete23/tf_mlp/sweeps/" + sweep_id)

best_run = sweep.best_run()
print(best_run.id)
pprint(best_run.config)

[34m[1mwandb[0m: Sorting runs by +summary_metrics.val_loss


40dunp3m
{'batch_size': 128,
 'beta_1': 0.986435025614692,
 'beta_2': 0.9904482393527106,
 'dense_1': 485,
 'dense_2': 176,
 'dense_3': 131,
 'dense_4': 122,
 'epochs': 19,
 'learning_rate': 0.00012885226472854095}


## Finish Wandb run

In [8]:
wandb.finish()