In [65]:
from tqdm import tqdm_notebook as tqdm
from IPython.display import Markdown, display
import os
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow import keras

In [66]:
def printmd(string):
    display(Markdown(string))

При чтении будем хранить данные в переменной `CACHE`, чтобы ускорить загрузку. В случае их обновления, достаточно вызвать `read(override=True)`

In [67]:
SMALL_DATASET_DIR = '../../nmnist/notMNIST_small/'
LARGE_DATASET_DIR = '../../nmnist/notMNIST_large/'
CACHE = {}
LABEL_MAP = {}
INV_LABEL_MAP = {}

In [68]:
def read(data_dir, override=False):
    f_v = 0
    global CACHE
    if not CACHE.get(data_dir, []) or override:
        CACHE[data_dir] = []
        X, y = [], []
        for f in tqdm(os.listdir(data_dir), desc='Letter'):
            if not f.startswith('.'):
                img_dir = os.path.join(data_dir, f)
                for img in os.listdir(img_dir):
                    img_path = os.path.join(img_dir, img)
                    data = cv2.imread(img_path, 0)
                    if data is None:
                        continue
                    X.append(data * 2 / 255 - 1)
                    if LABEL_MAP.get(f) is None:
                        LABEL_MAP[f] = f_v
                        INV_LABEL_MAP[f_v] = f
                        f_v += 1
                    y.append(LABEL_MAP[f])
        CACHE[data_dir].append(np.array(X))
        CACHE[data_dir].append(np.array(y))
    return CACHE[data_dir][0], CACHE[data_dir][1]

In [69]:
def get_data(data_dir, verbose=False, override=False):
    X, y = read(data_dir, override=override)
    assert X.shape[0] == y.shape[0]
    N = X.shape[0]
    if verbose:
        print(X.shape)
        print(y.shape)
        print(X[:5])
        print(y[:5])
        print(np.unique(y))
    return X, y

In [70]:
def get_split_data(data_dir, size=(0.7, 0.3), verbose=False, random_state=6, override=False):
    X, y = shuffle(*get_data(data_dir, verbose=verbose, override=override), random_state=random_state)
    assert abs(np.sum(size) - 1.0) < 0.001
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=size[1], random_state=random_state)
    return X_train, y_train, X_test, y_test

In [71]:
X_train, y_train, X_test, y_test = get_split_data(SMALL_DATASET_DIR, override=True)

HBox(children=(IntProgress(value=0, description='Letter', max=11, style=ProgressStyle(description_width='initi…




Построим 5 слоев с функциями активации `ReLU` и `softmax` на последнем слое, коэффициент обучения $0.001$, $100$ эпох:

In [72]:
lr = 0.001
epochs = 100
printmd('**Building with new architecture...**')
print('512(relu) - 256(relu) - 64(relu) - 32(relu) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dense(256, activation=tf.nn.relu),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer=tf.train.GradientDescentOptimizer(lr), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 256(relu) - 64(relu) - 32(relu) - 10(softmax)
Learning rate: 0.001
Epochs: 100

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 7

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Loss: 0.35672372748001735
Accuracy: 0.8971164115343538


Изменим коэффициент обучения на больший `lr = 0.1`:

In [74]:
lr = 0.1
epochs = 50
printmd('**Building with new architecture...**')
print('512(relu) - 256(relu) - 64(relu) - 32(relu) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dense(256, activation=tf.nn.relu),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer=tf.train.GradientDescentOptimizer(lr), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 256(relu) - 64(relu) - 32(relu) - 10(softmax)
Learning rate: 0.1
Epochs: 50

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Loss: 0.503723808802721
Accuracy: 0.9186543253826984


Точность увеличилась, попробуем найти оптимальный коэффициент обучения:

In [76]:
lr = 0.2
epochs = 50
printmd('**Building with new architecture...**')
print('512(relu) - 256(relu) - 64(relu) - 32(relu) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dense(256, activation=tf.nn.relu),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer=tf.train.GradientDescentOptimizer(lr), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 256(relu) - 64(relu) - 32(relu) - 10(softmax)
Learning rate: 0.2
Epochs: 50

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Loss: 0.466606018412621
Accuracy: 0.9120683517265931


Как видно, точность слегка упала. Поэтому экспериментально будем считать, что коэффициент обучения, увеличение которого ухудшает точность, равен `lr_opt = 0.1`. 

Далее попробуем применить другие функции активации, например `tanh`:

In [78]:
lr = 0.1
epochs = 50
printmd('**Building with new architecture...**')
print('512(tanh) - 128(tanh) - 32(tanh) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.tanh),
    keras.layers.Dense(128, activation=tf.nn.tanh),
    keras.layers.Dense(32, activation=tf.nn.tanh),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer=tf.train.GradientDescentOptimizer(lr), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(tanh) - 128(tanh) - 32(tanh) - 10(softmax)
Learning rate: 0.1
Epochs: 50

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Loss: 0.4285942423105175
Accuracy: 0.9163403346386615


В целом результаты не улучшились

Попробуем добавить dropout (используем небольшой коэффициент, как рекомендуется здесь: https://machinelearningmastery.com/dropout-regularization-deep-learning-models-keras/

```Generally, use a small dropout value of 20%-50% of neurons with 20% providing a good starting point. A probability too low has minimal effect and a value too high results in under-learning by the network.```) и L2 регуляризатор (задачи L1 в целом решаются дропаутом):

In [84]:
lr = 0.1
epochs = 100
printmd('**Building with new architecture...**')
print('512(relu) - 128(relu) - 32(tanh) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(128, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(32, activation=tf.nn.tanh, kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer=tf.train.GradientDescentOptimizer(lr), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 128(relu) - 32(tanh) - 10(softmax)
Learning rate: 0.1
Epochs: 100

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 7

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Loss: 0.9961611226004781
Accuracy: 0.783730864991663


Без регуляризатора:

In [85]:
lr = 0.1
epochs = 100
printmd('**Building with new architecture...**')
print('512(relu) - 128(relu) - 32(tanh) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(32, activation=tf.nn.tanh),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer=tf.train.GradientDescentOptimizer(lr), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 128(relu) - 32(tanh) - 10(softmax)
Learning rate: 0.1
Epochs: 100

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 7

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Loss: 0.3621504976871144
Accuracy: 0.9220363118547525


Как видно, регуляризатор лишь ухудшил результат.

Попробуем применить динамический (адаптивный) коэффициент обучения. Для этого воспользуемся методом `Adam`:

In [80]:
lr = 0.001
epochs = 100
printmd('**Building with new architecture...**')
print('512(relu) - 128(relu) - 32(tanh) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.tanh),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
loss_method = 'sparse_categorical_crossentropy'
model.compile(optimizer=tf.train.AdamOptimizer(lr), 
              loss=loss_method,
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 128(relu) - 32(tanh) - 10(softmax)
Learning rate: 0.001
Epochs: 100

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Loss: 0.35621672610269217
Accuracy: 0.912246351014596


In [82]:
lr = 0.01
epochs = 100
printmd('**Building with new architecture...**')
print('512(relu) - 128(relu) - 32(tanh) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.tanh),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
loss_method = 'sparse_categorical_crossentropy'
model.compile(optimizer=tf.train.AdamOptimizer(lr, beta1=0.8, beta2=0.9), 
              loss=loss_method,
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 128(relu) - 32(tanh) - 10(softmax)
Learning rate: 0.01
Epochs: 100

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100

KeyboardInterrupt: 

<div style="text-align: right"> @chiselko6 </div>