In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import DataLoader
from options import args
from models.base import BaseModel
from trainers.base import AbstractTrainer
from models import model_factory
from dataloaders import dataloader_factory
from trainers import trainer_factory
from utils import *
import tqdm

In [2]:
args.dataloader_code = 'ae'
args.dataset_code = 'ml-1m'
train_loader, valid_loader, test_loader = dataloader_factory(args)

Already preprocessed. Skip preprocessing


In [3]:
x = train_loader.dataset.data
valid = valid_loader.dataset.input_data
test = test_loader.dataset.input_data

In [16]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(Autoencoder, self).__init__()
        self.hidden_dim = None
        self.latent_dim = None
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, latent_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, input_dim),
            nn.Sigmoid()
        )
        self.encoder.apply(self.weight_init)
        self.decoder.apply(self.weight_init)
        
    def weight_init(self, m):
        if isinstance(m, nn.Linear):
            nn.init.kaiming_normal_(m.weight)
            m.bias.data.normal_(0.0, 0.001)
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [17]:
num_samples = x.shape[0]
input_dim = x.shape[1]
hidden_dim = 0
latent_dim = args.ae_latent_dim
learning_rate = args.lr
num_epochs = 100

model = Autoencoder(input_dim,hidden_dim,latent_dim)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [20]:
#Training
from tqdm import tqdm
train_loader = tqdm(train_loader)
for epoch in range(num_epochs):
    for batch_index, batch in enumerate(train_loader):
        # batch_size = batch[0].size(0)
        batch = [x.to(args.device) for x in batch]
        input_x = torch.stack(batch)
        decoded = model(input_x)
        loss = criterion(decoded, input_x)
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

100%|██████████| 95/95 [00:05<00:00, 16.03it/s]


Epoch [1/100], Loss: 0.0896
Epoch [2/100], Loss: 0.0784
Epoch [3/100], Loss: 0.0546
Epoch [4/100], Loss: 0.0661
Epoch [5/100], Loss: 0.0511
Epoch [6/100], Loss: 0.0566
Epoch [7/100], Loss: 0.0543
Epoch [8/100], Loss: 0.0477
Epoch [9/100], Loss: 0.0421
Epoch [10/100], Loss: 0.0342
Epoch [11/100], Loss: 0.0345
Epoch [12/100], Loss: 0.0324


KeyboardInterrupt: 

In [None]:
encoder = model.encoder
encoded_train_data = encoder(x).detach().numpy()

In [53]:
decoded  = model(x)
import numpy as np 
from sklearn.metrics import roc_auc_score
roc_auc_score(np.array(x),np.where(decoded.detach().numpy() > 0.5, 1., 0.))

0.998733741935491

In [60]:
encoded = model.encoder(x)
u,s,v = torch.svd(encoded)
u.shape,s.shape,v.shape

(torch.Size([6034, 256]), torch.Size([256]), torch.Size([256, 256]))

In [89]:
torch.save(v,'v.pt')