In [165]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [166]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

class Decoder(nn.Module):
    def __init__(self, latent_dim, hidden_dim, output_dim):
        super(Decoder, self).__init__()
        self.FC_hidden = nn.Linear(latent_dim, hidden_dim)
        self.FC_hidden2 = nn.Linear(hidden_dim, hidden_dim)
        self.FC_output = nn.Linear(hidden_dim, output_dim)
        
        self.LeakyReLU = nn.LeakyReLU(0.2)
        
    def forward(self, x):
        h     = self.LeakyReLU(self.FC_hidden(x))
        h     = self.LeakyReLU(self.FC_hidden2(h))
        
        x_hat = torch.sigmoid(self.FC_output(h))
        return x_hat
    
class Encoder(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(Encoder, self).__init__()

        self.FC_input = nn.Linear(input_dim, hidden_dim)
        self.FC_input2 = nn.Linear(hidden_dim, hidden_dim)
        self.FC_mean  = nn.Linear(hidden_dim, latent_dim)
        self.FC_var   = nn.Linear (hidden_dim, latent_dim)
        
        self.LeakyReLU = nn.LeakyReLU(0.2)
        
        self.training = True
        
    def forward(self, x):
        h_       = self.LeakyReLU(self.FC_input(x))
        h_       = self.LeakyReLU(self.FC_input2(h_))
        mean     = self.FC_mean(h_)
        log_var  = self.FC_var(h_)                     # encoder produces mean and log of variance 
                                                       #             (i.e., parateters of simple tractable normal distribution "q"
        
        return mean, log_var
    
class VAE(nn.Module):
    def __init__(self, Encoder, Decoder):
        super(VAE, self).__init__()
        self.Encoder = Encoder
        self.Decoder = Decoder
        
    def reparameterization(self, mean, var):
        epsilon = torch.randn_like(var).to(DEVICE)        # sampling epsilon        
        z = mean + var*epsilon                          # reparameterization trick
        return z
        
                
    def forward(self, x):
        mean, log_var = self.Encoder(x)
        z = self.reparameterization(mean, torch.exp(0.5 * log_var)) # takes exponential function (log var -> var)
        x_hat            = self.Decoder(z)
        
        return x_hat, mean, log_var

def VAELoss(x, x_hat, mean, logvar, beta=1):
    MSE = nn.functional.mse_loss(x_hat, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp())
    return MSE + beta * KLD

In [167]:
class DF_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.data = torch.tensor(df.values, dtype=torch.float32)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return (self.data[idx], 0)
    
def create_dataloaders(df, batchsize=32, test_size=0.2):
    # Split the DataFrame into training and testing sets
    df_train, df_test = train_test_split(df, test_size=test_size)
    
    # Create the training and testing datasets
    train_dataset = DF_Dataset(df_train)
    test_dataset = DF_Dataset(df_test)
    
    # Create the DataLoaders for training and testing sets
    train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)
    
    return train_loader, test_loader

def loss_fn(x, x_hat):
    vec = ((x - x_hat)**2).cpu().detach().numpy()
    #print(vec)
    mse_loss = np.sum(vec)
    #print(mse_loss.shape)
    #print('--------------------------')
    return mse_loss

In [168]:
df_data = pd.read_csv('bert_embeddings_small.csv')
train_loader, test_loader = create_dataloaders(df_data)

In [169]:
def test_reconstruction_loss(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0.0
    total_samples = 0
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    with torch.no_grad():  # No need to calculate gradients
        for data in test_loader:
            inputs = data[0].to(device)  # Assuming data is a tuple of (inputs, targets)
            reconstructions = model(inputs)[0]
            #loss = nn.functional.mse_loss(reconstructions, inputs, reduction='sum')
            loss = loss_fn(inputs, reconstructions)
            total_loss += loss
            total_samples += inputs.size(0)*inputs.size(1)
    
    average_loss = total_loss / total_samples
    return average_loss

In [175]:
def train(model, optimizer, epochs, batch_size=32, x_dim=512):
    model.train()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print("Using device:", device)
    model.to(device)
    for epoch in (range(epochs)):
        overall_loss = 0
        model.train()
        for batch_idx, (x, _) in (enumerate(train_loader)):
            x = x.to(device)
            optimizer.zero_grad()
            x_hat, mean, log_var = model(x)
            loss = VAELoss(x, x_hat, mean, log_var, beta=1)
            
            overall_loss += loss.item()
            
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch} \treconstruction loss: {test_reconstruction_loss(model, test_loader)}')
    return overall_loss



In [176]:
encoder = Encoder(768, 400, 128)
decoder = Decoder(128, 400, 768)
VAE_model = VAE(encoder, decoder).to(DEVICE)

In [177]:
train(VAE_model, torch.optim.Adam(VAE_model.parameters(), lr=0.001), epochs=500)

Using device: cuda
Epoch 0 	reconstruction loss: 0.23496133524780188
Epoch 1 	reconstruction loss: 0.23111460968996542
Epoch 2 	reconstruction loss: 0.2290461599698065
Epoch 3 	reconstruction loss: 0.22799904741474555
Epoch 4 	reconstruction loss: 0.22823806807663824
Epoch 5 	reconstruction loss: 0.22722276918776
Epoch 6 	reconstruction loss: 0.22705141865893197
Epoch 7 	reconstruction loss: 0.2265317569394131
Epoch 8 	reconstruction loss: 0.22679019945334783
Epoch 9 	reconstruction loss: 0.2264028129269081
Epoch 10 	reconstruction loss: 0.22524623202896277
Epoch 11 	reconstruction loss: 0.22570242898210763
Epoch 12 	reconstruction loss: 0.22631180732257555
Epoch 13 	reconstruction loss: 0.2259653739660966
Epoch 14 	reconstruction loss: 0.22559331763924206
Epoch 15 	reconstruction loss: 0.22572820213241082
Epoch 16 	reconstruction loss: 0.2253709528057106
Epoch 17 	reconstruction loss: 0.22553442409302724
Epoch 18 	reconstruction loss: 0.2250382321016431
Epoch 19 	reconstruction loss: 

KeyboardInterrupt: 

In [None]:
print(VAE_model)

VAE(
  (Encoder): Encoder(
    (FC_input): Linear(in_features=768, out_features=400, bias=True)
    (FC_input2): Linear(in_features=400, out_features=400, bias=True)
    (FC_mean): Linear(in_features=400, out_features=128, bias=True)
    (FC_var): Linear(in_features=400, out_features=128, bias=True)
    (LeakyReLU): LeakyReLU(negative_slope=0.2)
  )
  (Decoder): Decoder(
    (FC_hidden): Linear(in_features=128, out_features=400, bias=True)
    (FC_hidden2): Linear(in_features=400, out_features=400, bias=True)
    (FC_output): Linear(in_features=400, out_features=768, bias=True)
    (LeakyReLU): LeakyReLU(negative_slope=0.2)
  )
)


In [None]:
def print_tensor_devices(model):
    for name, param in model.named_parameters():
        print(f"{name} is on {param.device}")
print_tensor_devices(VAE_model)

Encoder.FC_input.weight is on cuda:0
Encoder.FC_input.bias is on cuda:0
Encoder.FC_input2.weight is on cuda:0
Encoder.FC_input2.bias is on cuda:0
Encoder.FC_mean.weight is on cuda:0
Encoder.FC_mean.bias is on cuda:0
Encoder.FC_var.weight is on cuda:0
Encoder.FC_var.bias is on cuda:0
Decoder.FC_hidden.weight is on cuda:0
Decoder.FC_hidden.bias is on cuda:0
Decoder.FC_hidden2.weight is on cuda:0
Decoder.FC_hidden2.bias is on cuda:0
Decoder.FC_output.weight is on cuda:0
Decoder.FC_output.bias is on cuda:0
