# Implement MNIST dataset in tensorflow

In [1]:
import tensorflow as tf
tf.enable_eager_execution()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation

        w_shape = (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval = 0, maxval = 1)
        self.W = tf.Variable(w_initial_value)

        b_shape = (output_size,)
        b_initial_value = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial_value)

    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs, self.W) + self.b)

    @property
    def weights(self):
        return [self.W, self.b]

In [3]:
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x

    @property
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights
        return weights

In [4]:
class BatchGenerator:
    def __init__(self, images, labels, batch_size = 128):
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size

    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

In [5]:
def update_weights(gradients, weights):
    learning_rate = 1e-3
    for g, w in zip(gradients, weights):
        w.assign_sub(g * learning_rate)

from tensorflow.keras import optimizers
def update_weights_optimizer(gradients, weights):
    optimizers.SGD(learning_rate = 1e-3).apply_gradients(zip(gradients, weights))

def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    gradients = tape.gradient(average_loss, model.weights)
#     print(gradients)
    update_weights(gradients, model.weights)
    return average_loss

def fit(model, images, labels, epochs, batch_size = 128):
    for epoch_counter in range(epochs):
        print("epoch {}".format(epoch_counter))
        batchGenerator = BatchGenerator(images, labels, batch_size)
        for batch_counter in range(len(images) // batch_size):
            images_batch, labels_batch = batchGenerator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print("loss at batch {} = {}".format(batch_counter, loss))

In [6]:
from tensorflow.keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

model = NaiveSequential([
    NaiveDense(input_size = 28*28, output_size = 512, activation = tf.nn.relu),
    NaiveDense(input_size = 512, output_size = 10, activation = tf.nn.softmax)
])
assert len(model.weights) == 4

fit(model, train_images, train_labels, 5)

# batchGenerator = BatchGenerator(train_images, train_labels, 128)
# images_batch, labels_batch = batchGenerator.next()
# one_training_step(model, images_batch, labels_batch)

epoch 0
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
loss at batch 0 = 14.420263290405273
loss at batch 100 = 14.22926139831543
loss at batch 200 = 14.103338241577148
loss at batch 300 = 14.984796524047852
loss at batch 400 = 14.22926139831543
epoch 1
loss at batch 0 = 14.48110580444336
loss at batch 100 = 14.22926139831543
loss at batch 200 = 14.103338241577148
loss at batch 300 = 14.984796524047852
loss at batch 400 = 14.22926139831543
epoch 2
loss at batch 0 = 14.48110580444336
loss at batch 100 = 14.22926139831543
loss at batch 200 = 14.103338241577148
loss at batch 300 = 14.984796524047852
loss at batch 400 = 14.22926139831543
epoch 3
loss at batch 0 = 14.48110580444336
loss at batch 100 = 14.22926139831543
loss at batch 200 = 14.103338241577148
loss at batch 300 = 14.984796524047852
loss at batch 400 = 14.22926139831543
epoch 4
loss at batch 0 = 14.48110580444336
loss at batch 100 = 14.22926139831543
loss at batch 200 = 14.10333824

### It seem not working, all gradients return 0!