In [1]:
from tqdm import tqdm_notebook as tqdm
from IPython.display import Markdown, display
import os
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow import keras

In [2]:
def printmd(string):
    display(Markdown(string))

При чтении будем хранить данные в переменной `CACHE`, чтобы ускорить загрузку. В случае их обновления, достаточно вызвать `read(override=True)`

In [3]:
SMALL_DATASET_DIR = '../../nmnist/notMNIST_small/'
LARGE_DATASET_DIR = '../../nmnist/notMNIST_large/'
CACHE = {}
LABEL_MAP = {}
INV_LABEL_MAP = {}

In [4]:
def read(data_dir, override=False):
    f_v = 0
    global CACHE
    if not CACHE.get(data_dir, []) or override:
        CACHE[data_dir] = []
        X, y = [], []
        for f in tqdm(os.listdir(data_dir), desc='Letter'):
            if not f.startswith('.'):
                img_dir = os.path.join(data_dir, f)
                for img in os.listdir(img_dir):
                    img_path = os.path.join(img_dir, img)
                    data = cv2.imread(img_path, 0)
                    if data is None:
                        continue
                    X.append(data * 2 / 255 - 1)
                    if LABEL_MAP.get(f) is None:
                        LABEL_MAP[f] = f_v
                        INV_LABEL_MAP[f_v] = f
                        f_v += 1
                    y.append(LABEL_MAP[f])
        CACHE[data_dir].append(np.array(X))
        CACHE[data_dir].append(np.array(y))
    return CACHE[data_dir][0], CACHE[data_dir][1]

In [5]:
def get_data(data_dir, verbose=False, override=False):
    X, y = read(data_dir, override=override)
    assert X.shape[0] == y.shape[0]
    N = X.shape[0]
    if verbose:
        print(X.shape)
        print(y.shape)
        print(X[:5])
        print(y[:5])
        print(np.unique(y))
    return X, y

In [6]:
def get_split_data(data_dir, size=(0.7, 0.3), verbose=False, random_state=6, override=False):
    X, y = shuffle(*get_data(data_dir, verbose=verbose, override=override), random_state=random_state)
    assert abs(np.sum(size) - 1.0) < 0.001
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=size[1], random_state=random_state)
    return X_train, y_train, X_test, y_test

In [7]:
X_train, y_train, X_test, y_test = get_split_data(LARGE_DATASET_DIR, override=True)

HBox(children=(IntProgress(value=0, description='Letter', max=10, style=ProgressStyle(description_width='initi…




Используем настройки из запуска на малом датасете, а также изменим их немного под большой датасет: `lr = 0.1`, `epochs = 20`, функции активации `ReLU`

In [16]:
lr = 0.1
epochs = 20
printmd('**Building with new architecture...**')
print('512(relu) - 256(relu) - 64(relu) - 32(relu) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dense(256, activation=tf.nn.relu),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer=tf.train.GradientDescentOptimizer(lr), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 256(relu) - 64(relu) - 32(relu) - 10(softmax)
Learning rate: 0.1
Epochs: 20

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.33970796888758176
Accuracy: 0.9031026553690112


Попробуем добавить dropout (используем небольшой коэффициент, как рекомендуется здесь: https://machinelearningmastery.com/dropout-regularization-deep-learning-models-keras/

```Generally, use a small dropout value of 20%-50% of neurons with 20% providing a good starting point. A probability too low has minimal effect and a value too high results in under-learning by the network.```)

In [19]:
lr = 0.1
epochs = 20
printmd('**Building with new architecture...**')
print('512(relu) - 128(relu) - 32(tanh) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(32, activation=tf.nn.tanh),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer=tf.train.GradientDescentOptimizer(lr), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 128(relu) - 32(tanh) - 10(softmax)
Learning rate: 0.1
Epochs: 20

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.3250090445377254
Accuracy: 0.8995054650836904


Видно, что модель давала прогресс на эпохах, докинем еще 15:

In [20]:
model.fit(X_train, y_train, epochs=15)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Loss: 0.3076896562316675
Accuracy: 0.9052823888875144


Попробуем применить динамический (адаптивный) коэффициент обучения. Для этого воспользуемся методом `Adam`:

In [21]:
lr = 0.001
epochs = 30
printmd('**Building with new architecture...**')
print('512(relu) - 128(relu) - 32(tanh) - 10(softmax)')
print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.tanh),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
loss_method = 'sparse_categorical_crossentropy'
model.compile(optimizer=tf.train.AdamOptimizer(lr), 
              loss=loss_method,
              metrics=['accuracy'])
model.fit(X_train, y_train, epochs=epochs)
loss, acc = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

512(relu) - 128(relu) - 32(tanh) - 10(softmax)
Learning rate: 0.001
Epochs: 30

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Loss: 0.39701959608926446
Accuracy: 0.8782877122251527


Как и в случае с малым датасетом, `Adam` в данной задаче плохо работает

<div style="text-align: right"> @chiselko6 </div>