In [3]:
from tensorflow import keras
import tensorflow as tf
import datetime as dt
import numpy as np

In [4]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

In [5]:
class Model(object):
    def __init__(self, activation, num_layers= 6, hidden_size= 10):
        # cria os layers do modelo
        self.num_layers = num_layers
        self.nn_model = tf.keras.Sequential()
        
        # cria 6 camadas densas com 10 neurônios cada
        for i in range(num_layers):
            self.nn_model.add(tf.keras.layers.Dense(hidden_size, activation= activation, name=f"layer{i + 1}"))
        
        # cria camada de saída
        self.nn_model.add(tf.keras.layers.Dense(10, name="output_layer"))
    
    @tf.function
    def foward(self, input_images):
        input_images = tf.cast(input_images, tf.float32)
        input_images = tf.reshape(input_images, [-1, 28*28])
        input_images = input_images/ 255.0
        logits = self.nn_model(input_images)
        
        return logits
    
    @staticmethod
    def loss(logits, labels):
        return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits= logits, labels = labels))
    
    def log_gradient(self, gradients, train_writer, step):
        
        # trainable_variables retorna as variaveis de treinamento, os pesos e o bias
        assert len(gradients) == len(self.nn_model.trainable_variables)
        
        for i in range(len(gradients)):
            # apenas acessa os dvalores de peso
            if "kernel" in self.nn_model.trainable_variables[i].name:
                
                # Cria visualização, 1- valor absoluto dos gradientes, 2- histograma com os valores do gradiente, 3- valores dos pesos, para cada layer
                with train_writer.as_default():
                    tf.summary.scalar(f"mean_{int((i-1)/2)}", tf.reduce_mean(tf.abs(gradients[i])), step= step)
                    tf.summary.histogram(f"mean_{int((i-1)/2)}", gradients[i], step= step)
                    tf.summary.histogram(f"mean_{int((i-1)/2)}", self.nn_model.trainable_variables[i], step= step)
        
    
    def plot_computational_graphs(self, train_writer, x_batch):
        tf.summary.trace_on(graph= True)
        self.foward(x_batch)
        
        with train_writer.as_default():
            tf.summary.trace_export(name='graph', step= 0)

                                

In [6]:
def get_batch(x, y, size):
    idxs = np.random.randint(0, len(y), size)
    return x[idxs,:,:], y[idxs]

def run_training(model: Model, sub_folder: str, iterations: int= 2500, batch_size:int= 32, log_freq:int= 200):
    dt_n = dt.datetime.now().strftime('%d%m%Y%H%M')
    train_writer = tf.summary.create_file_writer(f"tf_vis/vis_{dt_n}")
    model.plot_computational_graphs(train_writer, x_train[:batch_size, :, :])

    optimizer = tf.keras.optimizers.Adam()

    for i in range(iterations):
        # busca um batch aleatório de imagens
        image_batch, label_batch = get_batch(x_train, y_train, batch_size)
        
        # transforma x e y em um tensor
        image_batch = tf.constant(image_batch)
        label_batch = tf.cast(tf.constant(label_batch), tf.int32)
        
        # captura operações feitas no feed-foward e na função de loss
        with tf.GradientTape() as tape:
            logits = model.foward(image_batch)
            loss = model.loss(logits, label_batch)
        
        #obtém os gradientes
        gradients = tape.gradient(loss, model.nn_model.trainable_variables)
        
        # aplica o back-propagation
        optimizer.apply_gradients(zip(gradients, model.nn_model.trainable_variables))
        
        if i % log_freq == 0:
            max_idxs = tf.argmax(logits, axis= 1)
            acc = np.sum(max_idxs.numpy() == label_batch.numpy()) / len(label_batch.numpy())
            
            print(f"Iter: {i}, loss={loss:.3f}, accuracy={acc * 100:.3f}%")
            
            with train_writer.as_default():
                tf.summary.scalar('loss', loss, step=i)
                tf.summary.scalar('accuracy', acc, step=i)
            model.log_gradient(gradients, train_writer, i)
        


In [7]:
scenarios = ['sigmoid', "relu", "leaky_relu"]
act_funcs = [tf.sigmoid, tf.nn.relu, tf.nn.leaky_relu]

assert len(scenarios) == len(act_funcs)

for i in range(len(scenarios)):
    model = Model(act_funcs[i], 6, 10)
    run_training(model, scenarios[i])

Iter: 0, loss=2.441, accuracy=18.750%
Iter: 200, loss=2.314, accuracy=6.250%
Iter: 400, loss=2.273, accuracy=15.625%
Iter: 600, loss=2.210, accuracy=9.375%
Iter: 800, loss=1.937, accuracy=18.750%
Iter: 1000, loss=1.808, accuracy=18.750%
Iter: 1200, loss=1.734, accuracy=46.875%
Iter: 1400, loss=1.817, accuracy=31.250%
Iter: 1600, loss=1.681, accuracy=31.250%
Iter: 1800, loss=1.764, accuracy=25.000%
Iter: 2000, loss=1.627, accuracy=34.375%
Iter: 2200, loss=1.489, accuracy=31.250%
Iter: 2400, loss=1.509, accuracy=34.375%
Iter: 0, loss=2.293, accuracy=15.625%
Iter: 200, loss=1.494, accuracy=50.000%
Iter: 400, loss=0.793, accuracy=75.000%
Iter: 600, loss=0.682, accuracy=81.250%
Iter: 800, loss=0.366, accuracy=90.625%
Iter: 1000, loss=0.597, accuracy=87.500%
Iter: 1200, loss=0.471, accuracy=84.375%
Iter: 1400, loss=0.770, accuracy=78.125%
Iter: 1600, loss=0.401, accuracy=93.750%
Iter: 1800, loss=0.352, accuracy=90.625%
Iter: 2000, loss=0.218, accuracy=96.875%
Iter: 2200, loss=0.457, accuracy

In [1]:
!rm -rf tf_vis/*