In [3]:
import numpy as np
import pandas as pd
import keras
import imageio
import glob
from keras.models import Sequential
from keras.layers import *
from keras.optimizers import SGD
from IPython.display import Markdown, display
import gc

def printmd(string):
    display(Markdown(string))

Загрузим датасет, разобьём на train 90%, val 10%, test 10%.

In [4]:
df = pd.read_csv('mnist.csv', dtype=np.float32)

train = df.sample(frac=0.8)
other = df.drop(train.index)
val   = other.sample(frac=0.5)
test  = other.drop(val.index)

split = lambda df: (
    df.drop('label', axis=1).as_matrix() / 255,            # 255 — map colors to [0..1]
    keras.utils.to_categorical(df.label.as_matrix(), 10))  # 10 — number of classes

trainX, trainY = split(train)
valX, valY     = split(val)
testX, testY   = split(test)

Функция будет обучать модель и смотреть точность.

In [16]:
def test(layers, test=True, print=True):
    keras.backend.clear_session()
    
    m = Sequential()
    for l in layers: m.add(l)
    
    m.compile(SGD(0.2), 'categorical_crossentropy', ['accuracy'])
    m.fit(trainX, trainY, batch_size=128, epochs=10, verbose=True, validation_data=(valX, valY))

    x, y = (testX, testY) if test else (valX, valY)
    loss, accu = m.evaluate(x, y, verbose=False)
    keras.backend.clear_session()
    
    title = '**test**' if test else '**val**'
    if print: printmd(f'{title}: loss {loss:.3f}, accuracy {accu:.3f}')
    return accu

### task 1

Обучите softmax регрессию на подготовленном датасете mnist.

In [46]:
test([
    Dense(10, activation='softmax', input_shape=(28 * 28,)),
])

**test**:  loss 0.305,   accuracy 0.903

### task 2

Добавьте один слой из 1024 нейронов с tanh функцией активации.

In [48]:
test([
    Dense(1024, activation='tanh', input_shape=(28 * 28,)),
    Dense(10,   activation='softmax'),
])

**test**:  loss 0.269,   accuracy 0.918

### task 3

Добавьте к нейронной сети слой из 32-х сверток с ядром 5x5 и шагом 2 с функцией активации ReLU.

In [49]:
test([
    Reshape((28, 28, 1), input_shape=(28 * 28,)),
    Conv2D(32, (5, 5), strides=2, activation='relu'),
    Flatten(),
    Dense(1024, activation='tanh'),
    Dense(10,   activation='softmax'),
])

**test**:  loss 0.109,   accuracy 0.969

### task 4

Добавьте к нейронной сети слой max pool ядром (2x2) и шагом 1.

In [50]:
test([
    Reshape((28, 28, 1), input_shape=(28 * 28,)),
    Conv2D(32, (5, 5), strides=2, activation='relu'),
    MaxPooling2D((2, 2), strides=1),
    Flatten(),
    Dense(1024, activation='tanh'),
    Dense(10,   activation='softmax'),
])

**test**:  loss 0.094,   accuracy 0.976

### task 5

Добавьте к нейронной сети регуляризацию dropout и оптимизируйте коэффициент, основываясь на validate сете.

In [15]:
run = lambda rate: test([
        Reshape((28, 28, 1), input_shape=(28 * 28,)),
        Conv2D(32, (5, 5), strides=2, activation='relu'),
        MaxPooling2D((2, 2), strides=1),
        Dropout(rate),
        Flatten(),
        Dense(1024, activation='tanh'),
        Dense(10,   activation='softmax'),
    ], test=False, print=False)

rates = np.linspace(0, 1, 20)
accus = np.array([run(rate) for rate in rates])

rate, acc = rates[accus.argmax()], accus.max()
print(f'best rate: {rate:.3f}, acc: {acc:.3f}')

best rate: 0.368, acc: 0.986


### task 6

Добавьте еще один слой свертки и еще один слой maxpool (параметры задайте самостоятельно).

In [35]:
test([
    Reshape((28, 28, 1), input_shape=(28 * 28,)),
    Conv2D(32, (5, 5), strides=2, activation='relu'),
    MaxPooling2D((2, 2), strides=1),
    Dropout(0.368),
    Conv2D(64, (5, 5), strides=2, activation='relu'),
    MaxPooling2D((2, 2), strides=2),
    Dropout(0.368),
    Flatten(),
    Dense(1024, activation='tanh'),
    Dense(10,   activation='softmax'),
])

**test**: loss 0.047, accuracy 0.988

0.988

### task 7

Подготовьте датасет notMNIST (выделите train/validate/test) и примените сетки из основного задания.

In [5]:
df = pd.read_csv('not_mnist.csv', dtype=np.float32)

train = df.sample(frac=0.8)
other = df.drop(train.index)
val   = other.sample(frac=0.5)
test  = other.drop(val.index)

split = lambda df: (
    df.drop('label', axis=1).as_matrix() / 255,            # 255 — map colors to [0..1]
    keras.utils.to_categorical(df.label.as_matrix(), 10))  # 10 — number of classes

trainX, trainY = split(train)
valX, valY     = split(val)
testX, testY   = split(test)

##### 1 sotmax

In [17]:
test([
    Dense(10, activation='softmax', input_shape=(28 * 28,)),
]) and None

Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**test**: loss 0.419, accuracy 0.893

###### 2 tanh, 1024

In [18]:
test([
    Dense(1024, activation='tanh', input_shape=(28 * 28,)),
    Dense(10,   activation='softmax'),
]) and None

Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**test**: loss 6.034, accuracy 0.261

###### 3 RELU

In [19]:
test([
    Reshape((28, 28, 1), input_shape=(28 * 28,)),
    Conv2D(32, (5, 5), strides=2, activation='relu'),
    Flatten(),
    Dense(1024, activation='tanh'),
    Dense(10,   activation='softmax'),
]) and None

Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**test**: loss 0.343, accuracy 0.919

###### 4 maxpool

In [20]:
test([
    Reshape((28, 28, 1), input_shape=(28 * 28,)),
    Conv2D(32, (5, 5), strides=2, activation='relu'),
    MaxPooling2D((2, 2), strides=1),
    Flatten(),
    Dense(1024, activation='tanh'),
    Dense(10,   activation='softmax'),
]) and None

Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**test**: loss 0.330, accuracy 0.907

###### 5 dropout

In [21]:
run = lambda rate: test([
        Reshape((28, 28, 1), input_shape=(28 * 28,)),
        Conv2D(32, (5, 5), strides=2, activation='relu'),
        MaxPooling2D((2, 2), strides=1),
        Dropout(rate),
        Flatten(),
        Dense(1024, activation='tanh'),
        Dense(10,   activation='softmax'),
    ], test=False, print=False)

rates = np.linspace(0, 1, 20)
accus = np.array([run(rate) for rate in rates])

rate, acc = rates[accus.argmax()], accus.max()
print(f'best rate: {rate:.3f}, acc: {acc:.3f}')

Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10


Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
best rate: 0.263, acc: 0.928


###### 6 total

In [22]:
test([
    Reshape((28, 28, 1), input_shape=(28 * 28,)),
    Conv2D(32, (5, 5), strides=2, activation='relu'),
    MaxPooling2D((2, 2), strides=1),
    Dropout(rate),
    Flatten(),
    Dense(1024, activation='tanh'),
    Dense(10,   activation='softmax'),
]) and None

Train on 14979 samples, validate on 1872 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**test**: loss 0.270, accuracy 0.926

### task 8

Сравните время работы самой глубокой полученной сети на CPU и GPU.

In [7]:
run = lambda: test([
    Reshape((28, 28, 1), input_shape=(28 * 28,)),
    Conv2D(32, (5, 5), strides=2, activation='relu'),
    MaxPooling2D((2, 2), strides=1),
    Dropout(0.368),
    Conv2D(64, (5, 5), strides=2, activation='relu'),
    MaxPooling2D((2, 2), strides=2),
    Dropout(0.368),
    Flatten(),
    Dense(1024, activation='tanh'),
    Dense(10,   activation='softmax'),
])

In [38]:
%%time

print('GPU')
run()

GPU


**test**: loss 0.059, accuracy 0.985

CPU times: user 10.3 s, sys: 1.77 s, total: 12.1 s
Wall time: 10.4 s


In [10]:
# restart the kernel, run this cell, then the 1st and following

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [9]:
%%time

import tensorflow as tf
with tf.device('/cpu:0'):
    print('CPU')
    run()

CPU


**test**: loss 14.619, accuracy 0.093

CPU times: user 1min 3s, sys: 12.9 s, total: 1min 16s
Wall time: 26.2 s


<b style="color:#ff5722">Итог</b>: время на GPU: 12 секунд, время на CPU: 66 секунд.