In [1]:
import tensorflow as tf
import datetime as dt
import numpy as np

from tensorflow.keras.datasets import mnist

In [16]:
# Define arquivo para visualização no tensorboard
out_file = f"tf_vis/hist_{dt.datetime.now().strftime('%d%m%Y%H%M')}"
train_summary_writer = tf.summary.create_file_writer(out_file)

In [18]:
@tf.function
def nn_model(x, labels, W1, b1, W2, b2):
    x =  tf.reshape(x, (x.shape[0], -1))
    
    # cria namespace com as operações abaixo no tensorboard
    with tf.name_scope("Hidden") as scope:
        hidden_logits = tf.add(tf.matmul(tf.cast(x, tf.float32), W1), b1)
        hidden_out = tf.nn.sigmoid(hidden_logits)

    with tf.name_scope("Output") as scope:
        logits = tf.add(tf.matmul(hidden_out, W2), b2)
    
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels= labels, logits= logits))
    
    return logits, hidden_logits, hidden_out, cross_entropy

In [19]:
def get_batch(x, y, size):
    idxs = np.random.randint(0, len(y), size)
    return x[idxs,:,:], y[idxs]

In [20]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
epochs = 10

batch_size = 100

# normaliza as imagens, valores ficam entre 0 e 1
x_train = x_train / 255.0
x_test = x_test / 255.0

# converte o x_test para tensor para ser usado no modelo, dados de treinamento serão convertidos on the fly
x_test = tf.Variable(x_test)

# Declara pesos conectando o input com a camada oculta
W1 = tf.Variable(tf.random.normal([784, 300], stddev= 0.03), name='w1')
b1 = tf.Variable(tf.random.normal([300]), name='b1')

# e os pesos conectando a camada oculta à camada de saida
W2 = tf.Variable(tf.random.normal([300, 10], stddev= 0.03), name='w2')
b2 = tf.Variable(tf.random.normal([10]), name='b2')

optimizer = tf.keras.optimizers.Adam()

# Cria gráfico com as operações feitas pela rede

In [6]:
# obtem uma amostra aleatória
batch_x, batch_y = get_batch(x_train, y_train, batch_size)

# Cria tensores
batch_x = tf.constant(batch_x)
batch_y = tf.constant(batch_y)

# Cria vetor one hot
batch_y = tf.one_hot(batch_y, 10)

# Habilita o tensorboard a capturar informações
tf.summary.trace_on(graph=True, profiler=True)

logits = nn_model(batch_x, batch_y, W1, b1, W2, b2)
with train_summary_writer.as_default():
    tf.summary.trace_export(name="nn_model",step=0, profiler_outdir=out_file)
    
print("\nTraining complete")            
        


Training complete


![title](images/tb_puro.png)
![title](images/tb_namespace.png) 

# Cria grafico de loss, logits e das imagens preditas

In [21]:
# determina o numero de batches para roda em cada época, garante que na média cada amostra de treinamento será usada
total_batch = int(len(y_train)/ batch_size)

for epoch in range(epochs):
    
    # acompanha a média de loss para cada época
    avg_loss = 0
    
    for i in range(total_batch):
        
        # obtem uma amostra aleatória
        batch_x, batch_y = get_batch(x_train, y_train, batch_size)
        
        # Cria tensores
        batch_x = tf.constant(batch_x)
        batch_y = tf.constant(batch_y)
        
        # Cria vetor one hot
        batch_y = tf.one_hot(batch_y, 10)
        
        # API para o TF calcular os gradients, qualquer operação e/ou variveis dentro desse contexto, o TF irá capturar os gradientes
        with tf.GradientTape() as tape:
            
            # TF agora sabe quais operações e variaveis deve fazer o tracking, para  depois podermos fazer operações com os gradientes
            logits, hidden_logits, hidden_out, loss = nn_model(batch_x, batch_y, W1, b1, W2, b2)
         
        #calcula a derivada (dL/dw & Dl/db)
        gradients = tape.gradient(loss, [W1, b1, W2, b2])
        
        # associa o gradient aos pesos e bias e executa o gradiente descendente
        optimizer.apply_gradients(zip(gradients, [W1, b1, W2, b2]))
        
        # Loss médio da época
        avg_loss += loss / total_batch
        
    test_logits,_,_,_ = nn_model(x_test, tf.one_hot(y_test, 10), W1, b1, W2, b2)
    max_idxs = tf.argmax(test_logits, axis= 1)
    test_acc = np.sum(max_idxs.numpy() == y_test)/ len(y_test)
    
    print(f"Epoch:{epoch + 1}, loss={avg_loss:.3f}, test set    accuracy={test_acc * 100:.3f}%")
    
    if epoch == 0:
        
        # captura as imagens que o modelo acertou e error
        correct_inputs = tf.boolean_mask(x_test, max_idxs.numpy()== y_test)
        incorrect_inputs = tf.boolean_mask(x_test, tf.logical_not(max_idxs.numpy() == y_test))
        
        with train_summary_writer.as_default():
            # restaura a imagem para o formato original de 28x28 e adiciona um limite de 5 amostras para mostrar no TB
            tf.summary.image("correct images", tf.reshape(correct_inputs, (-1, 28, 28, 1)), max_outputs=5, step=epoch)
            tf.summary.image("incorrect images", tf.reshape(incorrect_inputs, (-1, 28, 28, 1)), max_outputs=5, step=epoch)
    
    # cria visualização no TB com loss ,accuracy e os logists
    with train_summary_writer.as_default():
        tf.summary.scalar("loss", avg_loss, step=epoch)
        tf.summary.scalar("accuracy", test_acc, step=epoch)
        tf.summary.histogram("Hidden_logits", hidden_logits, step=epoch)
        tf.summary.histogram("Hidden_out", hidden_out, step=epoch)

print("\nTraining complete")
        
            
        

Epoch:1, loss=0.551, test set    accuracy=92.320%
Epoch:2, loss=0.226, test set    accuracy=94.250%
Epoch:3, loss=0.173, test set    accuracy=95.240%
Epoch:4, loss=0.134, test set    accuracy=96.000%
Epoch:5, loss=0.113, test set    accuracy=96.630%
Epoch:6, loss=0.095, test set    accuracy=96.820%
Epoch:7, loss=0.075, test set    accuracy=97.100%
Epoch:8, loss=0.063, test set    accuracy=97.290%
Epoch:9, loss=0.056, test set    accuracy=97.580%
Epoch:10, loss=0.047, test set    accuracy=97.750%

Training complete


![title](images/acc.png)
![title](images/logits.png)
![title](images/imgs.png)