## VAE with Gaussian Distribuition Data

In [73]:
import timeit
start_time = timeit.default_timer()
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F # type: ignore
import numpy as np
from torchvision.utils import save_image
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

debug = True


cuda


##### Dados
Criando dados de uma distribuição normal ```xTrain``` (utilizado no treinamento), ```xVal``` (utilizado na validação do modelo).


In [74]:
nVar = 1                                                # NOTE: The number of variables(features) in the input data. Tensor shape: (nSamples, nVar) Example: data.shape[1] = nVar

nTrain = 100000  
nVal = 20000
nTest = nVal

x_train = torch.normal(5, 2, size=(nTrain, nVar)).to(device) 
x_val = torch.normal(5, 2, size=(nVal, nVar)).to(device)
x_test = torch.normal(5, 2, size=(nTest, nVar)).to(device) 

x_train

tensor([[ 7.4872],
        [-0.1194],
        [ 2.8721],
        ...,
        [ 5.5996],
        [ 2.2071],
        [ 5.7944]], device='cuda:0')

##### Carregando os dados

DataLoaders funcionam iterando sobre o dataset.

```Batch_size``` é a quantidade de dados que será pego a cada iteração.

 ```shuffle=True``` é importante para pegar sempre um set de dados aleatório a cada iteração.

In [75]:
batch_size = 100

trainLoader = torch.utils.data.DataLoader(x_train, batch_size=batch_size, shuffle=True)
valLoader = torch.utils.data.DataLoader(x_val, batch_size=batch_size, shuffle=False)
testLoader = torch.utils.data.DataLoader(x_test, batch_size=batch_size, shuffle=False)



##### Definição da Arquitetura VAE

In [None]:
from typing import Union

class VAE(nn.Module):
    def __init__(self, nVar: int, h_dims: list, z_dim: int, activation: Union[nn.ReLU, nn.Sigmoid, nn.Tanh, nn.LeakyReLU, nn.ELU]):
        super().__init__()
        """Defines the architecture of the VAE
        Args:
            h_dims: list of hidden dimensions
            z_dim: dimension of the latent space
            nVar: dimension of the input data
        """

        hidden_Layers = []
        input_dim = nVar

        for h in h_dims:
            hidden_Layers.append(nn.Linear(input_dim, h))
            hidden_Layers.append(activation)
            input_dim = h
            
        self.encode_layers = nn.Sequential(*hidden_Layers)                  # NOTE: * is used to unpack the list of layers; it is equivalent to writing nn.Sequential(hidden_Layers[0], hidden_Layers[1], ...)
        self.decode_layers = nn.Sequential(*hidden_Layers[::-1])            # 

        self.fc_mu = nn.Linear(h_dims[-1], z_dim)                           #   
        self.fc_logvar = nn.Linear(h_dims[-1], z_dim)                       

        self.fc_output = nn.Linear(h_dims[0], nVar)                             #

    def forward(self, x):                                                   # NOTE: x: input data
        mu, log_var = self.encoder(x)
        z = self.sampling(mu, log_var)
        return self.decoder(z), mu, log_var

    def sampling(self, mu, log_var):
        epsilon = torch.randn_like(log_var)
        return mu + epsilon * log_var

    def encoder(self, x):
        h = self.encode_layers(x)
        return self.fc_mu(h), self.fc_logvar(h)
    
    def decoder(self, x):
        x = self.decode_layers(x)
        return self.fc_output(x)
    


Loss Function

Mean Squered error + Kullback

In [77]:
def loss_function(recon_x, x, mu, std):
    # Loss function
    ERR = nn.MSELoss(reduction='sum')(recon_x, x)
    KLD = -0.5 * torch.sum(1 + torch.log(std**2) - mu**2 - std**2)
    return ERR + KLD, -ERR, -KLD

Sumarizarando : : 

| Step | Action | PyTorch Function |
| :-: | :-: | -: |
| 1 | Forward pass | ```model()``` |
| 2 | Compute loss | ```loss_function()``` |
| 3 | Backpropagate gradients | ```loss.backward()``` |
| 4 | Update weight | ```optimizer.step()``` |
| 5 | Clear gradients | ```optimizer.zero_grad()``` |


In [78]:
model = VAE(nVar, [32, 16], 2, nn.Tanh()).to(device)

##### The optimizer : : ADAM

In [79]:
optimizer = torch.optim.Adam(
        model.parameters(),
        lr = 0.002
    )

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                                    optimizer, 'max',
                                    factor=0.5,
                                    patience=5, threshold=0.001, cooldown=0,
                                    min_lr=0.0001, verbose=True
                                )

In [80]:
count=0

err_l, kld_l, n_wu, terrl, tkldl, update = [], [], [], [], [], []
for epoch in range(1, 20):
    
    model.train()                   # NOTE: Mudei de lugar, antes estava dentro do loop for
    train_loss = 0

    for batch_idx, (data) in enumerate(trainLoader):
        bsize = data.shape[0]
        recon_batch, mu, std = model(data)
        loss, err, kld = loss_function(recon_batch, data, mu, std)
        loss.backward()
        train_loss += err.item() + kld.item()
        optimizer.step()
        optimizer.zero_grad()
        err_l.append(err.item()/bsize)
        kld_l.append(kld.item()/bsize)
        count+=1
        n_wu.append(count)

        if (count%100 == 0): # Validating every 100 weight updates
          for batch_idx, datav in enumerate(valLoader):
            model.eval()
            trecon, tmu, tstd = model(datav)
            tloss, terr, tkld = loss_function(trecon, datav, tmu, tstd)
            terrl.append(terr.item()/datav.shape[0])
            tkldl.append(tkld.item()/datav.shape[0])
            update.append(count)

    scheduler.step(train_loss / len(trainLoader.dataset))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x2 and 32x16)