In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [2]:
class VAE(nn.Module):
    def __init__(self, input_dim=784, hidden_dim=400, latent_dim=200):
        super(VAE, self).__init__()
        # encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, latent_dim),
            nn.LeakyReLU(0.2)
            )
        
        # latent mean and variance 
        self.mean_layer = nn.Linear(latent_dim, 2)
        self.logvar_layer = nn.Linear(latent_dim, 2)
        
        # decoder
        self.decoder = nn.Sequential(
            nn.Linear(2, latent_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(latent_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
            )
     
    def encode(self, x):
        x = self.encoder(x)
        mean, logvar = self.mean_layer(x), self.logvar_layer(x)
        return mean, logvar

    def reparameterization(self, mean, var):
        epsilon = torch.randn_like(var)
        z = mean + var*epsilon
        return z

    def decode(self, x):
        return self.decoder(x)

    def forward(self, x):
        mean, logvar = self.encode(x)
        z = self.reparameterization(mean, logvar)
        x_hat = self.decode(z)
        return x_hat, mean, logvar

def VAELoss(x, x_hat, mean, logvar, beta=1):
    MSE = nn.functional.mse_loss(x_hat, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp())
    return MSE + beta * KLD

In [3]:
class DF_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.data = torch.tensor(df.values, dtype=torch.float32)
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        return (self.data[idx], 0)
    
def create_dataloaders(df, batchsize=32, test_size=0.2):
    # Split the DataFrame into training and testing sets
    df_train, df_test = train_test_split(df, test_size=test_size)
    
    # Create the training and testing datasets
    train_dataset = DF_Dataset(df_train)
    test_dataset = DF_Dataset(df_test)
    
    # Create the DataLoaders for training and testing sets
    train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)
    
    return train_loader, test_loader

def loss(x, x_hat):
    mse_loss = np.mean((x - x_hat)**2)
    return mse_loss

In [4]:
df_data = pd.read_csv('bert_embeddings_small.csv')
train_loader, test_loader = create_dataloaders(df_data)

In [56]:
def test_reconstruction_loss(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0.0
    total_samples = 0
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    with torch.no_grad():  # No need to calculate gradients
        for data in test_loader:
            inputs = data[0].to(device)  # Assuming data is a tuple of (inputs, targets)
            reconstructions = model(inputs)[0]
            '''print(inputs.shape)
            for i in range(inputs.shape[0]):
                for j in range(inputs.shape[1]):
                    if inputs[i][j] != 0:
                        print(f'{inputs[i][j]} -> {reconstructions[i][j]}')
                        print(reconstructions[i][j])'''
            
            loss = nn.functional.mse_loss(reconstructions, inputs, reduction='sum')
            total_loss += loss.item()
            total_samples += inputs.size(0)
    
    average_loss = total_loss / total_samples
    return average_loss

In [57]:
def train(model, optimizer, epochs, batch_size=32, x_dim=512):
    model.train()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print("Using device:", device)
    model.to(device)
    for epoch in (range(epochs)):
        overall_loss = 0
        model.train()
        for batch_idx, (x, _) in (enumerate(train_loader)):
            x = x.to(device)
            optimizer.zero_grad()
            x_hat, mean, log_var = model(x)
            loss = VAELoss(x, x_hat, mean, log_var, beta=1)
            
            overall_loss += loss.item()
            
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch} \treconstruction loss: {test_reconstruction_loss(model, test_loader)}')
    return overall_loss



In [59]:
VAE_model = VAE(input_dim=768, hidden_dim=400, latent_dim=256).to('cuda')
#print(f'Epoch -1 \treconstruction loss: {test_reconstruction_loss(VAE_model, test_loader)}')
train(VAE_model, torch.optim.Adam(VAE_model.parameters(), lr=0.005), epochs=2)

Using device: cuda
Epoch 0 	reconstruction loss: 194.71999242770232
Epoch 1 	reconstruction loss: 194.46224409834724


28309927.732910156

In [None]:
def print_tensor_devices(model):
    for name, param in model.named_parameters():
        print(f"{name} is on {param.device}")
print_tensor_devices(VAE_model)

encoder.0.weight is on cuda:0
encoder.0.bias is on cuda:0
encoder.2.weight is on cuda:0
encoder.2.bias is on cuda:0
mean_layer.weight is on cuda:0
mean_layer.bias is on cuda:0
logvar_layer.weight is on cuda:0
logvar_layer.bias is on cuda:0
decoder.0.weight is on cuda:0
decoder.0.bias is on cuda:0
decoder.2.weight is on cuda:0
decoder.2.bias is on cuda:0
decoder.4.weight is on cuda:0
decoder.4.bias is on cuda:0
