In [None]:
# A simple Dense Class
import tensorflow as tf
import math

class NaiveDense:
  def __init__(self, input_size, output_size, activation):
    self.activation = activation

    w_shape = (input_size, output_size)
    a=math.exp(-1)
    w_initial_value = tf.random.uniform(w_shape, minval = 0, maxval=1e-1)
    self.W = tf.Variable(w_initial_value)

    b_shape = (output_size, )
    b_initial_value = tf.random.uniform(b_shape)
    self.b = tf.Variable(b_initial_value)

  def __call__(self, inputs):
    return self.activation(tf.matmul(inputs, self.W))
  
  @property
  def weights(self):
    return [self.W, self.b]
  

In [None]:
# A simple Sequential Class
class NaiveSequential:
  
  def __init__(self, layers):
    self.layers = layers
  
  def __call__(self, inputs):
    x = inputs
    for layer in self.layers:
      x = layer(x)
    return x
  
  @property
  def weights(self):
    weights = []
    for layer in self.layers:
      weights += layer.weights
    return weights

In [None]:
#Using Naive Dense and Sequential Class we can make a mock Keras model
import tensorflow as tf
model = NaiveSequential([
                         NaiveDense(input_size = 28*28, output_size = 512, activation = tf.nn.relu),
                         NaiveDense(input_size = 512, output_size = 10, activation = tf.nn.softmax)
                         ])

assert len(model.weights) == 4


In [None]:
# A Batch Generator

class BatchGenerator:
  def __init__(self, images, labels, batch_size = 128):
    self.index = 0
    self.images = images
    self.labels = labels
    self.batch_size = batch_size
  
  def next(self):
    images = self.images[self.index : self.index + self.batch_size]
    labels = self.labels[self.index : self.index + self.batch_size]
    self.index += self.batch_size
    return images, labels
    

In [None]:
# Running One Training Set

def one_training_step_model(model, images_batch, labels_batch):
  with tf.GradientTape() as tape:
    predictions = model(images_batch)
    per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(
        labels_batch, predictions)
    avarage_loss = tf.reduce_mean(per_sample_losses)
  gradients = tape.gradient(avarage_loss, model.weights)
    
  update_weights(gradients, model.weights)
  return avarage_loss

#update_weights update the values related by gradient * learning_rate from each weights

learning_rate = 6e-2

def update_weights(gradients, weights):
  for g, w in zip(gradients, weights):
    w.assign_sub(w * learning_rate)   # assign_sub() same with -=

''' In practise we almost never implement a weight update step like this by hand
Instead, we would use an OPTIMIZER instance from Keras

from tensorflow.keras import optimizers
optimizer = optimizers.SGD(learning_rate=1e-3)

def update_weights(gradients, weights):
  optimizer.apply_gradients(zip(gradients, weights))
'''

' In practise we almost never implement a weight update step like this by hand\nInstead, we would use an OPTIMIZER instance from Keras\n\nfrom tensorflow.keras import optimizers\noptimizer = optimizers.SGD(learning_rate=1e-3)\n\ndef update_weights(gradients, weights):\n  optimizer.apply_gradients(zip(gradients, weights))\n'

In [None]:
# The full training loop

def fit(model, images, labels, epochs, batch_size=128):
  for epoch_counter in range(epochs):
    batch_generator = BatchGenerator(images, labels)
    for batch_counter in range(len(images) // batch_size):
      images_batch, labels_batch = batch_generator.next()
      loss = one_trining_step_model(model, images_batch, labels_batch)
      if batch_counter % 100 == 0:
        print('loss at batch %d: %.2f' % (batch_counter, loss))
    print('epoch number: %i' % epoch_counter)

In [None]:
# Testing
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_label) = mnist.load_data()

train_images = train_images.reshape(60000, 28*28)
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape(10000, 28*28)
test_images = test_images.astype('float32') / 255

fit(model, train_images, train_labels, epochs=7, batch_size=128)

loss at batch 0: 4.56
loss at batch 100: 2.30
loss at batch 200: 2.30
loss at batch 300: 2.30
loss at batch 400: 2.30
epoch number: 0
loss at batch 0: 2.30
loss at batch 100: 2.30
loss at batch 200: 2.30
loss at batch 300: 2.30
loss at batch 400: 2.30
epoch number: 1
loss at batch 0: 2.30
loss at batch 100: 2.30
loss at batch 200: 2.30
loss at batch 300: 2.30
loss at batch 400: 2.30
epoch number: 2
loss at batch 0: 2.30
loss at batch 100: 2.30
loss at batch 200: 2.30
loss at batch 300: 2.30
loss at batch 400: 2.30
epoch number: 3
loss at batch 0: 2.30
loss at batch 100: 2.30
loss at batch 200: 2.30
loss at batch 300: 2.30
loss at batch 400: 2.30
epoch number: 4
loss at batch 0: 2.30
loss at batch 100: 2.30
loss at batch 200: 2.30
loss at batch 300: 2.30
loss at batch 400: 2.30
epoch number: 5
loss at batch 0: 2.30
loss at batch 100: 2.30
loss at batch 200: 2.30
loss at batch 300: 2.30
loss at batch 400: 2.30
epoch number: 6
