In [1]:
import numpy as np

In [46]:
import tensorflow as tf
tf.__version__

'2.1.0-dev20191215'

In [3]:
tf.executing_eagerly()

True

In [4]:
a = tf.constant(1)
b = tf.constant(2)
c = a + b

print(c)

tf.Tensor(3, shape=(), dtype=int32)


### Больше никаких сессий

* В tf1.x вы вручную создавали граф, а затем компилировали его и обсчитывали результаты при помощи `sess.run`
* В tf2 сессия - деталь реализации, и значения вычисляются как в "обычном" python
* `sess.run` заменен на функции. Подробности далее
* Не нужны `tf.global_variables_initializer()` и прочие глобальные переменные (хотя мы их не разбирали).

## Полносвязная сеть

In [5]:
tf.compat.v1.get_default_graph()

<tensorflow.python.framework.ops.Graph at 0x7fa1385aba58>

In [6]:
def dense(x, W, b, activation):
    # return activation(x @ W + b)
    return activation(tf.matmul(x, W) + b)

In [21]:
x = np.random.randn(1, 10)
W = np.random.randn(10, 10)
b = np.random.randn(1, 10)

In [24]:
x @ W + b

array([[ 4.47382476, -0.13733021, -1.8861317 ,  0.47889112,  2.17733765,
        -3.17548282,  5.72190234, -1.55847526, -0.59147654,  3.38809699]])

In [39]:
print(dense(x, W, b, tf.sigmoid))
print(dense(x, W, b, tf.nn.relu))
print(dense(x, W, b, lambda x: x))

tf.Tensor(
[[0.98872496 0.4657213  0.13168616 0.617486   0.89819588 0.04009884
  0.9967372  0.17386555 0.35629614 0.96733046]], shape=(1, 10), dtype=float64)
tf.Tensor(
[[4.47382476 0.         0.         0.47889112 2.17733765 0.
  5.72190234 0.         0.         3.38809699]], shape=(1, 10), dtype=float64)
[[ 4.47382476 -0.13733021 -1.8861317   0.47889112  2.17733765 -3.17548282
   5.72190234 -1.55847526 -0.59147654  3.38809699]]


In [36]:
t = dense(x, W, b, tf.nn.relu)
type(t.numpy())
t.numpy()

array([[4.47382476, 0.        , 0.        , 0.47889112, 2.17733765,
        0.        , 5.72190234, 0.        , 0.        , 3.38809699]])

## Keras


> Документация: https://keras.io

In [7]:
import sys
print(sys.executable)

/usr/bin/python3


In [8]:
import tensorflow.keras as keras
from typing import Callable

In [9]:
(X_tr, y_tr), (X_test, y_test) = keras.datasets.mnist.load_data()

In [10]:
class MNISTSequence(keras.utils.Sequence):
    def __init__(self, X, y, batch_size, preprocess: Callable = None):
        super().__init__()
        self._X = X
        self._y = y
        self._preprocess_fn = preprocess
        self._batch_size = batch_size
        
    def _preprocess(self, X, y):
        if self._preprocess_fn is not None:
            return self._preprocess_fn(X, y)
        return (X / 255.).reshape((-1, 28*28)), y
        
    def __len__(self):
        return int(np.ceil(len(self._X) / float(self._batch_size)))
    
    def __getitem__(self, idx):
        batch_idx = slice(idx*self._batch_size, (idx+1)*self._batch_size, 1)
        x_batch = self._X[batch_idx]
        y_batch = self._y[batch_idx]
        return self._preprocess(x_batch, y_batch)

In [11]:
train_seq = MNISTSequence(X_tr, y_tr, 128)
test_seq = MNISTSequence(X_test, y_test, 128)

In [12]:
train_iter = iter(train_seq)
x_batch, y_batch = next(train_iter)

In [13]:
for x, y in train_seq:
    pass

## Декоратор tf.function

> Трансформирует функцию на python, в том числе циклы, if-else и т.д., в код tensorflow.

## Gradient tape

**инициализация** 

In [227]:
tf.Variable(np.random.rand(784, 128))

<tf.Variable 'Variable:0' shape=(784, 128) dtype=float64, numpy=
array([[0.89680368, 0.37815468, 0.20279992, ..., 0.97394806, 0.15416133,
        0.84316236],
       [0.08898942, 0.73674456, 0.23349083, ..., 0.93695346, 0.79359374,
        0.22899672],
       [0.29841466, 0.06071232, 0.73178058, ..., 0.74884755, 0.66460804,
        0.09951496],
       ...,
       [0.00322459, 0.6392351 , 0.77039475, ..., 0.25717034, 0.31391178,
        0.88196372],
       [0.11258042, 0.27393739, 0.00170766, ..., 0.16015985, 0.76814886,
        0.2112376 ],
       [0.11743671, 0.95546248, 0.94452555, ..., 0.80189773, 0.35165856,
        0.18717889]])>

In [240]:
W1 = tf.Variable(np.random.randn(784, 128))
b1 = tf.Variable(np.zeros((1, 128)))
W2 = tf.Variable(np.random.randn(128, 10))
b2 = tf.Variable(np.zeros((1, 10)))

**"модель"**

In [151]:
from functools import partial


@tf.function
def mlp(x, W1, b1, a1, W2, b2, a2):
    x = dense(x, W1, b1, a1)
    x = dense(x, W2, b2, a2)
    return x


model = partial(mlp, W1=W1, b1=b1, a1=tf.nn.sigmoid, W2=W2, b2=b2, a2=tf.nn.sigmoid)

**Как вычислить значения меток?**

In [152]:
x_test_batch, y_test_batch = next(iter(test_seq))
y_test_predicted = np.argmax(model(x_test_batch).numpy(), axis=1)
accuracy_on_batch = sum(y_test_batch == y_test_predicted) / len(y_test_batch)
accuracy_on_batch

0.03125

**Все необходимое для оптимизации**

In [153]:
optimizer = keras.optimizers.Adam()
trainable_variables = [W1, b1, W2, b2]

**Цикл обучения: одна эпоха**

In [154]:
for x, y in train_seq:
    with tf.GradientTape() as tape:
        prediction = model(x)
        loss = keras.losses.sparse_categorical_crossentropy(y, prediction)
    gradients = tape.gradient(loss, trainable_variables)
    optimizer.apply_gradients(zip(gradients, trainable_variables))

**Волшебство!**

In [155]:
y_test_predicted = np.argmax(model(x_test_batch).numpy(), axis=1)
accuracy_on_batch = sum(y_test_batch == y_test_predicted) / len(y_test_batch)
accuracy_on_batch

0.84375

## Полноценная сеть

In [236]:
tf.Variable(np.random.rand(784, 100))

<tf.Variable 'Variable:0' shape=(784, 100) dtype=float64, numpy=
array([[0.07354145, 0.95816226, 0.86958152, ..., 0.29124977, 0.4189878 ,
        0.92840866],
       [0.99050818, 0.99001549, 0.38769032, ..., 0.21420077, 0.005782  ,
        0.62489352],
       [0.39288026, 0.07498111, 0.06225706, ..., 0.53861557, 0.33892961,
        0.84317689],
       ...,
       [0.82507586, 0.31540101, 0.01527163, ..., 0.79641558, 0.77753827,
        0.60920997],
       [0.51993505, 0.38027543, 0.43004574, ..., 0.3533058 , 0.52283467,
        0.63536416],
       [0.06491503, 0.71965324, 0.09744576, ..., 0.21763117, 0.50965099,
        0.08717073]])>

In [14]:
from typing import Callable


class Dense:
    def __init__(self, inp_shape, out_shape, activation: Callable):
        self.trainable = True
        self._inp_shape = inp_shape
        self._out_shape = out_shape
        self._activation = activation
        if 'sigmoid' in self._activation.__name__:
            self._w = tf.Variable(np.random.rand(inp_shape, out_shape) * np.sqrt(6 / (inp_shape + out_shape)))
        elif 'relu' in self._activation.__name__:
            self._w = tf.Variable(np.random.randn(inp_shape, out_shape) * np.sqrt(2 / (inp_shape)))
        else:
            # Just a Normal
            self._w = tf.Variable(np.random.randn(inp_shape, out_shape))
        self._b = tf.Variable(np.zeros((1, out_shape)))
        
    def __call__(self, x):
        return self._activation(x @ self._w + self._b)
    
    def get_trainable(self):
        if self.trainable: 
            return [self._w, self._b]
        else:
            return []
        
    @property
    def inp_shape(self):
        return self._inp_shape
    
    @property
    def out_shape(self):
        return self._out_shape
    
    @property
    def w(self):
        return self._w
    
    @property
    def b(self):
        return self._b

In [15]:
class Sequential:
    def __init__(self, *args):
        self._layers = args
        self._trainable_variables = [i for s in [l.get_trainable() for l in self._layers] for i in s] 
        
    @tf.function
    def _forward(self, x):
        for layer in self._layers:
            x = layer(x)
        return x
        
    def fit_generator(self, train_seq, eval_seq, epoch, loss, optimizer):
        history = dict(train=list(), val=list())
        
        train_loss_results = list()
        val_loss_results = list()

        train_accuracy_results = list()
        val_accuracy_results = list()
        
        for e in range(epoch):
            p = tf.keras.metrics.Mean()
            epoch_loss_avg = tf.keras.metrics.Mean()
            epoch_loss_avg_val = tf.keras.metrics.Mean()

            epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
            epoch_accuracy_val = tf.keras.metrics.SparseCategoricalAccuracy()

            for x, y in train_seq:
                with tf.GradientTape() as tape:
                    prediction = self._forward(x)
                    loss_value = loss(y, prediction)
                    # epoch_train_loss.append(loss_value.mean())
                gradients = tape.gradient(loss_value, self._trainable_variables)
                optimizer.apply_gradients(zip(gradients, self._trainable_variables))
                epoch_accuracy.update_state(y, prediction)
                epoch_loss_avg.update_state(loss_value)
                
            train_accuracy_results.append(epoch_accuracy.result().numpy())
            train_loss_results.append(epoch_loss_avg.result().numpy())


            for x, y in eval_seq:
                prediction = self._forward(x)
                loss_value = loss(y, prediction)
                epoch_loss_avg_val.update_state(loss_value)
                epoch_accuracy_val.update_state(y, prediction)
            
            val_accuracy_results.append(epoch_accuracy_val.result().numpy())
            val_loss_results.append(epoch_loss_avg_val.result().numpy())

            # print(f"Epoch train loss: {epoch_train_loss[-1]:.2f},\nEpoch val loss: {epoch_val_loss[-1]:.2f}\n{'-'*20}")
            print("Epoch {}: Train loss: {:.3f} Train Accuracy: {:.3f}".format(e + 1,
                                                                               train_loss_results[-1],
                                                                               train_accuracy_results[-1]))
            print("Epoch {}: Val loss: {:.3f} Val Accuracy: {:.3f}".format(e + 1,
                                                                           val_loss_results[-1],
                                                                           val_accuracy_results[-1]))
            print('*' * 20)

#             history['train'].append(epoch_train_loss)
#             history['val'].append(epoch_val_loss)

        return history
            
    def predict_generator(self, seq):
        predictions = list()
        for x in seq:
            predictions.append(self._forward(x).numpy())
        return np.vstack(predictions)
    
    @property
    def trainable_variables(self):
        return self._trainable_variables

## ReLU

In [332]:
model = Sequential(Dense(784, 100, tf.nn.relu), 
                   Dense(100, 100, tf.nn.relu), 
                   Dense(100, 10, tf.nn.softmax))

hist = model.fit_generator(train_seq, test_seq, 10,
                    keras.losses.sparse_categorical_crossentropy, 
                    keras.optimizers.Adam())

Epoch 1: Train loss: 0.343 Train Accuracy: 0.898
Epoch 1: Val loss: 0.193 Val Accuracy: 0.941
********************
Epoch 2: Train loss: 0.138 Train Accuracy: 0.959
Epoch 2: Val loss: 0.150 Val Accuracy: 0.953
********************
Epoch 3: Train loss: 0.094 Train Accuracy: 0.972
Epoch 3: Val loss: 0.124 Val Accuracy: 0.963
********************
Epoch 4: Train loss: 0.066 Train Accuracy: 0.980
Epoch 4: Val loss: 0.125 Val Accuracy: 0.963
********************
Epoch 5: Train loss: 0.050 Train Accuracy: 0.985
Epoch 5: Val loss: 0.120 Val Accuracy: 0.965
********************
Epoch 6: Train loss: 0.039 Train Accuracy: 0.988
Epoch 6: Val loss: 0.118 Val Accuracy: 0.969
********************
Epoch 7: Train loss: 0.033 Train Accuracy: 0.989
Epoch 7: Val loss: 0.138 Val Accuracy: 0.965
********************
Epoch 8: Train loss: 0.029 Train Accuracy: 0.990
Epoch 8: Val loss: 0.130 Val Accuracy: 0.967
********************
Epoch 9: Train loss: 0.028 Train Accuracy: 0.990
Epoch 9: Val loss: 0.126 Val Ac

## Sigmoid

In [333]:
model = Sequential(Dense(784, 100, tf.nn.sigmoid), 
                   Dense(100, 100, tf.nn.sigmoid), 
                   Dense(100, 10, tf.nn.softmax))

hist = model.fit_generator(train_seq, test_seq, 10,
                    keras.losses.sparse_categorical_crossentropy, 
                    keras.optimizers.Adam())

Epoch 1: Train loss: 4.693 Train Accuracy: 0.596
Epoch 1: Val loss: 0.360 Val Accuracy: 0.894
********************
Epoch 2: Train loss: 0.273 Train Accuracy: 0.919
Epoch 2: Val loss: 0.217 Val Accuracy: 0.935
********************
Epoch 3: Train loss: 0.191 Train Accuracy: 0.944
Epoch 3: Val loss: 0.170 Val Accuracy: 0.950
********************
Epoch 4: Train loss: 0.149 Train Accuracy: 0.956
Epoch 4: Val loss: 0.143 Val Accuracy: 0.958
********************
Epoch 5: Train loss: 0.122 Train Accuracy: 0.964
Epoch 5: Val loss: 0.125 Val Accuracy: 0.964
********************
Epoch 6: Train loss: 0.101 Train Accuracy: 0.970
Epoch 6: Val loss: 0.114 Val Accuracy: 0.966
********************
Epoch 7: Train loss: 0.086 Train Accuracy: 0.974
Epoch 7: Val loss: 0.107 Val Accuracy: 0.968
********************
Epoch 8: Train loss: 0.073 Train Accuracy: 0.978
Epoch 8: Val loss: 0.102 Val Accuracy: 0.970
********************
Epoch 9: Train loss: 0.062 Train Accuracy: 0.982
Epoch 9: Val loss: 0.099 Val Ac

In [16]:
from functools import partial

In [312]:
train_seq_mod = MNISTSequence(X_tr, y_tr, 128, 
                              preprocess=lambda x, y: ((x.reshape((-1, 28 * 28)) / 255. - 0.5) * 2, y))
test_seq_mod = MNISTSequence(X_test, y_test, 128,
                             preprocess=lambda x, y: ((x.reshape((-1, 28 * 28)) / 255. - 0.5) * 2, y))

## Keras API

> Создаем слои

In [17]:
from pprint import pprint

In [10]:
hidden_size = 100
layers = [tf.keras.layers.Dense(hidden_size, activation=tf.nn.relu) for _ in range(2)]
layers.append(keras.layers.Dense(10, activation='softmax'))  # <-- Обратите внимание! Можно передавать имена объектов
pprint(layers)

[<tensorflow.python.keras.layers.core.Dense object at 0x7f07f051bb00>,
 <tensorflow.python.keras.layers.core.Dense object at 0x7f07f0585588>,
 <tensorflow.python.keras.layers.core.Dense object at 0x7f07f051b828>]


> Компилируем сеть

In [11]:
perceptron = tf.keras.Sequential(layers)
perceptron.compile(keras.optimizers.Adam(),
                   loss=keras.losses.sparse_categorical_crossentropy,
                   metrics=[keras.metrics.SparseCategoricalAccuracy()])

> Обучаем сеть

In [17]:
history = perceptron.fit_generator(train_seq, validation_data=test_seq, epochs=10)

Instructions for updating:
Please use Model.fit, which supports generators.
  ...
    to  
  ['...']
Train for 469 steps, validate for 79 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Все одним блоком

* Кроме подготовки данных. 
* Обратите внимание: активации, оптимизатор, метрика ... заданы строками.

In [69]:
hidden_size = 100
layers = [tf.keras.layers.Dense(hidden_size, activation='relu') for _ in range(2)]
layers.append(keras.layers.Dense(10, activation='softmax'))  # <-- Обратите внимание! Можно передавать имена объектов

perceptron = tf.keras.Sequential(layers)
perceptron.compile('adam',
                   'sparse_categorical_crossentropy',
                   metrics=['sparse_categorical_accuracy'])
history = perceptron.fit_generator(train_seq, 
                                   validation_data=test_seq,
                                   epochs=10)

Train for 469 steps
Epoch 1/10
Epoch 2/10

KeyboardInterrupt: 

## Tensorboard

> Сначала создадим директорию для записи логов.

In [30]:
!rm -rf logs/*

In [31]:
keras.backend.clear_session()

In [32]:
!mkdir -p logs
!ls -l | grep "^d"

drwxrwxr-x 7 1009 1009   4096 Jul 31 18:38 AAE
drwxrwxr-x 2 1009 1009   4096 Jul  7 20:14 Container
drwxrwxr-x 5 1009 1009   4096 Jul  3 19:13 conv_nets
drwxrwxr-x 7 1009 1009   4096 Jun 27 10:05 gradients
drwxrwxr-x 3 1009 1009   4096 Jul 17 10:52 homework
drwxrwxr-x 2 1009 1009   4096 Jun 19 08:54 img
drwxrwxr-x 2 1009 1009  12288 Dec 18 14:22 logs
drwxrwxr-x 5 1009 1009   4096 Dec 14 09:03 regularization
drwxrwxr-x 6 1009 1009   4096 Jul 13 11:31 tensorflow_lstm


In [33]:
tensorboard_cb = keras.callbacks.TensorBoard(log_dir='logs',
                                             histogram_freq=1,
                                             write_graph=True,
                                             write_grads=True,
                                             write_images=True,
                                             update_freq='batch')



In [34]:
hidden_size = 100
layers = [tf.keras.layers.Dense(hidden_size, activation='relu') for _ in range(2)]
layers.append(keras.layers.Dense(10, activation='softmax'))  # <-- Обратите внимание! Можно передавать имена объектов

perceptron = tf.keras.Sequential(layers)
perceptron.compile('adam',
                   'sparse_categorical_crossentropy',
                   metrics=['sparse_categorical_accuracy']
                  )
history = perceptron.fit_generator(train_seq, 
                                   validation_data=test_seq,
                                   epochs=10, 
                                   callbacks=[tensorboard_cb]
                                  )

  ...
    to  
  ['...']
Train for 469 steps, validate for 79 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [73]:
!rm -rf logs/*

In [74]:
keras.backend.clear_session()
tensorboard_cb = keras.callbacks.TensorBoard(log_dir='logs',
                                             histogram_freq=1,
                                             write_graph=True,
                                             write_grads=True,
                                             write_images=True,
                                             update_freq='batch')

hidden_size = 100
layers = [
    keras.layers.Dense(100, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(10, activation='softmax')

]

perceptron = tf.keras.Sequential(layers)
perceptron.compile('adam',
                   'sparse_categorical_crossentropy',
                    metrics=['sparse_categorical_accuracy']
                  )
history = perceptron.fit_generator(train_seq, 
                                   validation_data=test_seq,
                                   epochs=10, 
                                   callbacks=[tensorboard_cb]
                                  )

  ...
    to  
  ['...']
Train for 469 steps, validate for 79 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Но не все так гладко: сложно получить доступ к промежуточным значениям

> https://github.com/tensorflow/tensorflow/issues/33478

## Низкоуровневый tensorboard

In [113]:
from typing import Callable


class Dense:
    def __init__(self, inp_shape, out_shape, activation: Callable, name):
        self.trainable = True
        self._inp_shape = inp_shape
        self._out_shape = out_shape
        self._activation = activation
        if 'sigmoid' in self._activation.__name__:
            self._w = tf.Variable(np.random.rand(inp_shape, out_shape) * np.sqrt(6 / (inp_shape + out_shape)))
        elif 'relu' in self._activation.__name__:
            self._w = tf.Variable(np.random.randn(inp_shape, out_shape) * np.sqrt(2 / (inp_shape)))
        else:
            # Just a Normal
            self._w = tf.Variable(np.random.randn(inp_shape, out_shape))
        self._b = tf.Variable(np.zeros((1, out_shape)))
        self.name = name
        
    def __call__(self, x, writer=None, step=None):
        val = x @ self._w + self._b
        a = self._activation(val)
        if writer is not None:
            with writer.as_default():
                tf.summary.histogram(self.name + '_kernel', self._w, step=step)
                tf.summary.histogram(self.name + '_bias', self._b, step=step)
                tf.summary.histogram(self.name + '_activation', a, step=step)
                tf.summary.histogram(self.name + '_z', val, step=step)
        return a
    
    def get_trainable(self):
        if self.trainable: 
            return [self._w, self._b]
        else:
            return []
        
    @property
    def inp_shape(self):
        return self._inp_shape
    
    @property
    def out_shape(self):
        return self._out_shape
    
    @property
    def w(self):
        return self._w
    
    @property
    def b(self):
        return self._b

In [130]:
class Sequential:
    def __init__(self, *args):
        self._layers = args
        self._trainable_variables = [i for s in [l.get_trainable() for l in self._layers] for i in s] 
        
    def _forward(self, x, writer=None, step=None):
        for layer in self._layers:
            x = layer(x, writer, step)
        return x
        
    def fit_generator(self, train_seq, eval_seq, epoch, loss, optimizer, writer=None):
        history = dict(train=list(), val=list())
        
        train_loss_results = list()
        val_loss_results = list()

        train_accuracy_results = list()
        val_accuracy_results = list()
        
        step = 0
        for e in range(epoch):
            p = tf.keras.metrics.Mean()
            epoch_loss_avg = tf.keras.metrics.Mean()
            epoch_loss_avg_val = tf.keras.metrics.Mean()

            epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
            epoch_accuracy_val = tf.keras.metrics.SparseCategoricalAccuracy()

            for x, y in train_seq:
                with tf.GradientTape() as tape:
                    prediction = self._forward(x, writer, step)
                    loss_value = loss(y, prediction)
                    # epoch_train_loss.append(loss_value.mean())
                gradients = tape.gradient(loss_value, self._trainable_variables)
                optimizer.apply_gradients(zip(gradients, self._trainable_variables))
                epoch_accuracy.update_state(y, prediction)
                epoch_loss_avg.update_state(loss_value)
                with writer.as_default():
                    tf.summary.scalar('train_accuracy', epoch_accuracy.result().numpy(), step=step)
                    tf.summary.scalar('train_loss', epoch_loss_avg.result().numpy(), step=step)

                step += 1
                
            train_accuracy_results.append(epoch_accuracy.result().numpy())
            train_loss_results.append(epoch_loss_avg.result().numpy())


            for x, y in eval_seq:
                prediction = self._forward(x)
                loss_value = loss(y, prediction)
                epoch_loss_avg_val.update_state(loss_value)
                epoch_accuracy_val.update_state(y, prediction)
            
            val_accuracy_results.append(epoch_accuracy_val.result().numpy())
            val_loss_results.append(epoch_loss_avg_val.result().numpy())

            # print(f"Epoch train loss: {epoch_train_loss[-1]:.2f},\nEpoch val loss: {epoch_val_loss[-1]:.2f}\n{'-'*20}")
            print("Epoch {}: Train loss: {:.3f} Train Accuracy: {:.3f}".format(e + 1,
                                                                               train_loss_results[-1],
                                                                               train_accuracy_results[-1]))
            print("Epoch {}: Val loss: {:.3f} Val Accuracy: {:.3f}".format(e + 1,
                                                                           val_loss_results[-1],
                                                                           val_accuracy_results[-1]))
            print('*' * 20)

        return None
            
    def predict_generator(self, seq):
        predictions = list()
        for x in seq:
            predictions.append(self._forward(x).numpy())
        return np.vstack(predictions)
    
    @property
    def trainable_variables(self):
        return self._trainable_variables

In [131]:
!rm -rf logs/*

In [132]:
writer = tf.summary.create_file_writer("logs/relu")

In [133]:
model = Sequential(Dense(784, 100, tf.nn.relu, 'dense'), 
                   Dense(100, 100, tf.nn.relu, 'dense1'), 
                   Dense(100, 10, tf.nn.softmax, 'dense2'))

hist = model.fit_generator(train_seq, test_seq, 10,
                            keras.losses.sparse_categorical_crossentropy, 
                            keras.optimizers.Adam(),
                           writer
                          )

Epoch 1: Train loss: 0.330 Train Accuracy: 0.903
Epoch 1: Val loss: 0.175 Val Accuracy: 0.946
********************
Epoch 2: Train loss: 0.134 Train Accuracy: 0.960
Epoch 2: Val loss: 0.149 Val Accuracy: 0.956
********************
Epoch 3: Train loss: 0.091 Train Accuracy: 0.972
Epoch 3: Val loss: 0.136 Val Accuracy: 0.961
********************
Epoch 4: Train loss: 0.068 Train Accuracy: 0.979
Epoch 4: Val loss: 0.126 Val Accuracy: 0.964
********************
Epoch 5: Train loss: 0.052 Train Accuracy: 0.984
Epoch 5: Val loss: 0.125 Val Accuracy: 0.965
********************
Epoch 6: Train loss: 0.039 Train Accuracy: 0.988
Epoch 6: Val loss: 0.123 Val Accuracy: 0.966
********************
Epoch 7: Train loss: 0.032 Train Accuracy: 0.990
Epoch 7: Val loss: 0.136 Val Accuracy: 0.966
********************
Epoch 8: Train loss: 0.028 Train Accuracy: 0.991
Epoch 8: Val loss: 0.139 Val Accuracy: 0.965
********************
Epoch 9: Train loss: 0.024 Train Accuracy: 0.992
Epoch 9: Val loss: 0.126 Val Ac

In [119]:
writer = tf.summary.create_file_writer("logs/sigmoid")

In [121]:
model = Sequential(Dense(784, 100, tf.nn.sigmoid, 'dense'), 
                   Dense(100, 100, tf.nn.sigmoid, 'dense1'), 
                   Dense(100, 10, tf.nn.softmax, 'dense2'))

hist = model.fit_generator(train_seq, test_seq, 10,
                            keras.losses.sparse_categorical_crossentropy, 
                            keras.optimizers.Adam(),
                           writer
                          )

Epoch 1: Train loss: 2.726 Train Accuracy: 0.671
Epoch 1: Val loss: 1.849 Val Accuracy: 0.836
********************
Epoch 2: Train loss: 1.839 Train Accuracy: 0.842
Epoch 2: Val loss: 1.790 Val Accuracy: 0.851
********************
Epoch 3: Train loss: 1.792 Train Accuracy: 0.855
Epoch 3: Val loss: 1.760 Val Accuracy: 0.859
********************
Epoch 4: Train loss: 0.970 Train Accuracy: 0.900
Epoch 4: Val loss: 0.156 Val Accuracy: 0.952
********************
Epoch 5: Train loss: 0.137 Train Accuracy: 0.960
Epoch 5: Val loss: 0.127 Val Accuracy: 0.963
********************
Epoch 6: Train loss: 0.111 Train Accuracy: 0.967
Epoch 6: Val loss: 0.112 Val Accuracy: 0.967
********************
Epoch 7: Train loss: 0.093 Train Accuracy: 0.972
Epoch 7: Val loss: 0.104 Val Accuracy: 0.970
********************
Epoch 8: Train loss: 0.080 Train Accuracy: 0.977
Epoch 8: Val loss: 0.099 Val Accuracy: 0.972
********************
Epoch 9: Train loss: 0.068 Train Accuracy: 0.980
Epoch 9: Val loss: 0.095 Val Ac

In [145]:
from typing import Callable


class BatchNorm:
    def __init__(self, name):
        self.trainable = True
        self._beta = tf.Variable(0, dtype='float64')
        self._gamma = tf.Variable(1,  dtype='float64')
        self.name = name
        
    def __call__(self, x, writer=None, step=None):
        mu = tf.reduce_mean(x, axis=0)
        sigma = tf.math.reduce_std(x, axis=0)
        normed = (x - mu) / sigma # !!
        out = normed * self._gamma + self._beta
        
        if writer is not None:
            with writer.as_default():
                tf.summary.histogram(self.name + '_beta', self._beta, step=step)
                tf.summary.histogram(self.name + '_gamma', self._gamma, step=step)
                tf.summary.histogram(self.name + '_normed', normed, step=step)
                tf.summary.histogram(self.name + '_out', out, step=step)

        return out
    
    def get_trainable(self):
        if self.trainable: 
            return [self._beta, self._gamma]
        else:
            return []


In [146]:
writer = tf.summary.create_file_writer("logs/batch_norm")

In [147]:
model = Sequential(Dense(784, 100, tf.nn.sigmoid, 'dense'), 
                   BatchNorm('batch_norm'), 
                   Dense(100, 100, tf.nn.sigmoid, 'dense1'), 
                   Dense(100, 10, tf.nn.softmax, 'dense2'))

hist = model.fit_generator(train_seq, test_seq, 10,
                            keras.losses.sparse_categorical_crossentropy, 
                            keras.optimizers.Adam(),
                           writer
                          )

Epoch 1: Train loss: 1.925 Train Accuracy: 0.742
Epoch 1: Val loss: 0.269 Val Accuracy: 0.926
********************
Epoch 2: Train loss: 0.218 Train Accuracy: 0.939
Epoch 2: Val loss: 0.183 Val Accuracy: 0.948
********************
Epoch 3: Train loss: 0.145 Train Accuracy: 0.958
Epoch 3: Val loss: 0.146 Val Accuracy: 0.958
********************
Epoch 4: Train loss: 0.106 Train Accuracy: 0.969
Epoch 4: Val loss: 0.128 Val Accuracy: 0.962
********************
Epoch 5: Train loss: 0.082 Train Accuracy: 0.976
Epoch 5: Val loss: 0.122 Val Accuracy: 0.964
********************
Epoch 6: Train loss: 0.064 Train Accuracy: 0.981
Epoch 6: Val loss: 0.119 Val Accuracy: 0.966
********************
Epoch 7: Train loss: 0.049 Train Accuracy: 0.986
Epoch 7: Val loss: 0.118 Val Accuracy: 0.967
********************
Epoch 8: Train loss: 0.037 Train Accuracy: 0.990
Epoch 8: Val loss: 0.118 Val Accuracy: 0.968
********************
Epoch 9: Train loss: 0.028 Train Accuracy: 0.993
Epoch 9: Val loss: 0.120 Val Ac