In [1]:
import json
import sys
import torch
import numpy as np
import matplotlib.pyplot as plt
from os import path
from tqdm import tqdm
from IPython.display import clear_output
from sklearn.decomposition import IncrementalPCA
sys.path.append('../code')
from data_loader import get_dataset
from models.regressor_model import Regressor

In [2]:
with open("../data/data.json", "r") as f:
        data_dict = json.load(f)

unlabeled_dataloader, training_dataloader, validation_dataloader = get_dataset(data_dict=data_dict, batch_size=128)

Dataset loaded
Dataset loaded
Dataset loaded


In [3]:
n_components = 1000
ipca = IncrementalPCA(n_components=n_components)

# create PCA incrementally
for U, _ in tqdm(unlabeled_dataloader):
    ipca.partial_fit(U)

100%|██████████| 10/10 [05:40<00:00, 34.03s/it]


In [4]:
use_cuda = False

out_features = 156958

model = Regressor([n_components, 4096, out_features])
if use_cuda:
    model = model.cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.MSELoss()

if use_cuda:
    criterion = criterion.cuda()

def get_numpy(x):
    """ Get numpy array for both cuda and not. """
    if use_cuda:
        return x.cpu().data.numpy()
    return x.data.numpy()

In [5]:
epochs = 5
log_every = 1

val_losses, val_iter = [], []
train_losses, train_iter = [], []

for i in range(epochs):
    model.eval()
    tot_val_loss = 0
    for X, y in validation_dataloader:
        
        X_encoded = ipca.transform(X)
        
        X_encoded = torch.tensor(X_encoded, dtype=torch.float32, device='cuda' if use_cuda else 'cpu')
        if use_cuda:
            y = y.cuda()
        
        y_pred = model.forward(X_encoded)
        
        val_loss = criterion(y_pred, y[:, :out_features])
        tot_val_loss += get_numpy(val_loss)
    
    val_iter.append(i)
    val_losses.append(tot_val_loss/len(validation_dataloader))
    
    model.train()
    tot_train_loss = 0
    for X, y in training_dataloader:
        
        X_encoded = ipca.transform(X)
        
        X_encoded = torch.tensor(X_encoded, dtype=torch.float32, device='cuda' if use_cuda else 'cpu')
        if use_cuda:
            y = y.cuda()
        
        y_pred = model.forward(X_encoded)
        
        train_loss = criterion(y_pred, y[:, :out_features])
        tot_train_loss += get_numpy(train_loss)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
    
    train_iter.append(i)
    train_losses.append(tot_train_loss/len(training_dataloader))
    
    if i % log_every == 0:
        print(f"Training loss: {train_losses[-1]}\nValidation loss: {val_losses[-1]}")
        fig = plt.figure(figsize=(12,4))
        plt.subplot(1, 2, 1)
        plt.plot(train_iter, train_losses, label='train_loss')
        plt.plot(val_iter, val_losses, label='valid_loss')
        plt.legend()
        plt.show()
        clear_output(wait=True)

KeyboardInterrupt: 