In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd

plt.style.use('seaborn-notebook')

In [None]:
DATA_FILE = '../data/level5_1000.csv'

In [None]:
df = pd.read_csv(DATA_FILE)

In [None]:
df.head()

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [None]:
class Lincs(Dataset):

    def __init__(self):
        super().__init__()
        self.df = pd.read_csv(DATA_FILE)
    
    def shape(self):
        return self.df.shape
    
    def __len__(self):
        return self.df.shape[1]-1
    
    def __getitem__(self, idx):
        return torch.as_tensor(self.df.iloc[:,idx+1].values, dtype=torch.float32)

lincs = Lincs()
dloader = DataLoader(lincs, 32)

In [None]:
class VAE(nn.Module):
    def __init__(self, input_shape, dim_1, dim_2):
        super().__init__()
        self.fc1 = nn.Linear(input_shape, dim_1)
        self.fc2 = nn.Linear(dim_1, dim_1)
        self.enc_mu = nn.Linear(dim_1, dim_2)
        self.enc_logvar = nn.Linear(dim_1, dim_2)
        
        self.fc_out1 = nn.Linear(dim_2, dim_1)
        self.fc_out2 = nn.Linear(dim_1, dim_1)
        self.out = nn.Linear(dim_1, input_shape)
            
    def encode(self, x):
        hid = F.relu(self.fc1(x))
        hid = F.relu(self.fc2(hid))
        return self.enc_mu(hid), self.enc_logvar(hid)
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std
    
    def decode(self, z):
        hid = F.relu(self.fc_out1(z))
        hid = F.relu(self.fc_out2(hid))
        return self.out(hid)
    
    def forward(self, t):
        mu, logvar = self.encode(t)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar        

In [None]:
def loss_function(recon_x, x, mu, logvar):
    mse = F.mse_loss(x, recon_x, reduction='sum')
    kld = 0.5*(mu.pow(2).sum(dim=-1) + torch.exp(logvar).sum(dim=-1) - (logvar+1).sum(dim=-1))
    
    return (mse + kld).sum(dim=-1)

def train(epoch):
    
    model.train()
    train_loss = 0
    for batch_idx, x in enumerate(dloader):
        x = x.to(device)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(x)
        loss = loss_function(recon_batch, x, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    return train_loss / len(dloader)

def run(epochs=1000):

    train_losses = []
    for epoch in range(1000):
        train_losses.append(train(epoch))
        if epoch % 100 == 0:
            print(f'=======> Epoch: {epoch} Average loss: {train_losses[-1]}')
    plt.plot(np.arange(len(train_losses)), train_losses)

In [None]:
for dims in [(64, 7), (64, 32), (256, 128), (256, 32), (128, 32), (128, 64)]:
    dim_1 = dims[0]
    dim_2 = dims[1]

    seed = 2
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.manual_seed(seed)

    model = VAE(lincs.shape()[0], dim_1=64, dim_2=7)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    print(dims)
    run(epochs=500)