In [1]:
import torch.nn as nn
import torch.optim as optim
from tqdm.notebook import tqdm_notebook
import torch
import torchvision.transforms as T
import numpy as np

from lib.dataset import FolderDataset
from lib.models import ConvEncoder, ConvDecoder
from lib.utils import train_step, val_step, create_embedding


transforms = T.Compose([T.ToTensor(), T.Resize([512, 512])]) # Normalize the pixels and convert to tensor.

full_dataset = FolderDataset('data/img_data', transforms) # Create folder dataset.



In [None]:
len(full_dataset)

13514

In [3]:
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [12514, 1000]) 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=24, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=24)
full_loader = torch.utils.data.DataLoader(full_dataset, batch_size=24)





In [4]:
loss_fn = nn.MSELoss()

encoder = ConvEncoder()
decoder = ConvDecoder()

device = "cuda"


encoder.to(device)
decoder.to(device)


autoencoder_params = list(encoder.parameters()) + list(decoder.parameters())
optimizer = optim.Adam(autoencoder_params, lr=1e-3)

In [5]:
!export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128

In [6]:
EPOCHS = 30

max_loss = float('inf')


for epoch in tqdm_notebook(range(EPOCHS)):
        train_loss = train_step(encoder, decoder, train_loader, loss_fn, optimizer, device=device)
        
        print(f"Epochs = {epoch}, Training Loss : {train_loss}")
        
        val_loss = val_step(encoder, decoder, val_loader, loss_fn, device=device)
        
        print(f"Epochs = {epoch}, Validation Loss : {val_loss}")


        if val_loss < max_loss:
            max_loss = val_loss
            print("Validation Loss decreased, saving new best model")
            torch.save(encoder.state_dict(), "checkpoints/encoder_model.pt")
            torch.save(decoder.state_dict(), "checkpoints/decoder_model.pt")

EMBEDDING_SHAPE = (1, 256, 16, 16)


embedding = create_embedding(encoder, full_loader, EMBEDDING_SHAPE, device)

numpy_embedding = embedding.cpu().detach().numpy()
num_images = numpy_embedding.shape[0]

flattened_embedding = numpy_embedding.reshape((num_images, -1))
np.save("checkpoints/data_embedding.npy", flattened_embedding)

  0%|          | 0/30 [00:00<?, ?it/s]

0it [00:00, ?it/s]



Epochs = 0, Training Loss : 0.04869119077920914


0it [00:00, ?it/s]

Epochs = 0, Validation Loss : 0.06624417752027512
Validation Loss decreased, saving new best model


0it [00:00, ?it/s]

Exception ignored in: <function tqdm.__del__ at 0x7f5b30aeb160>
Traceback (most recent call last):
  File "/home/vaskers5/anaconda3/envs/image_similarity/lib/python3.9/site-packages/tqdm/std.py", line 1161, in __del__
    def __del__(self):
KeyboardInterrupt: 


KeyboardInterrupt: 