In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler, random_split
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from torch.nn import SyncBatchNorm
from torchvision.models import resnet50, resnet152
from torchvision.io import read_image
from torchvision.transforms import ToPILImage
from PIL import Image

import warnings
warnings.filterwarnings("ignore")

target_shape = (200, 200)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def getImagePaths(path):
    image_names = []
    for dirname, _, filenames in os.walk(path):
        for filename in filenames:
            fullpath = os.path.join(dirname, filename)
            image_names.append(fullpath)
    return image_names

left_dir_path = "ImgData/train/left"
right_dir_path = "ImgData/train/right"

left_images_path = getImagePaths(left_dir_path)
right_images_path = getImagePaths(right_dir_path)

print(f"Number of left images: {len(left_images_path)}\n")
print(f"Number of right images: {len(right_images_path)}\n")

Number of left images: 2000

Number of right images: 2000



In [3]:
def getShape(images_paths):
    shape = cv2.imread(images_paths[0]).shape
    for image_path in images_paths:
        image_shape=cv2.imread(image_path).shape
        if (image_shape!=shape):
            return "Different image shape"
        else:
            return "Same image shape " + str(shape)

In [4]:
anchor_images = left_images_path
positive_images = right_images_path

train_imgs = anchor_images + positive_images

In [5]:
def preprocess_image(image_tensor):
    """
    Preprocess the input image tensor.
    """
    
    # Define the transformations: resize
    transform = transforms.Compose([
    transforms.Resize(target_shape, antialias=True),  # Explicitly set antialias to True
    ])
    
    # Apply the transformations
    # print(image_tensor)
    image = transform(image_tensor)
    return image


def preprocess_doublets(anchor, test):
    return (
        preprocess_image(anchor),
        preprocess_image(test),
    )


In [6]:
class TLLDataset(Dataset):
    def __init__(self, train_imgs_paths):
        # Load images and convert to tensors
        train_imgs = [read_image(path) for path in train_imgs_paths]
        # print(len(train_imgs_paths))
        # positive_images = [read_image(find_co_path(path,ilookup,os.path.dirname(positive_image_paths[0]))) for path in anchor_image_paths]        
        self.train_imgs = torch.stack(train_imgs)
        
    def __len__(self):
        return len(self.train_imgs)
    
    def __getitem__(self, idx):
        img = self.train_imgs[idx]
        img = preprocess_image(img)
        return img

# Create an instance of TLLDataset
tll_dataset = TLLDataset(train_imgs)

# Determine the indices for training and validation
image_count = len(tll_dataset)
indices = list(range(image_count))
train_indices = indices[:round(image_count * 0.8)]
val_indices = indices[round(image_count * 0.8):]

# Create SubsetRandomSamplers
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

# Create DataLoaders for training and validation
train_loader = DataLoader(tll_dataset, batch_size=4, sampler=train_sampler)
val_loader = DataLoader(tll_dataset, batch_size=4, sampler=val_sampler)

In [7]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

class Reshape(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.shape = args

    def forward(self, x):
        return x.view(self.shape)
    
class VAE(nn.Module):

    def __init__(self, input_channels = 3, hidden_channels =32, latent_dim=64, device='cuda'):
        super(VAE, self).__init__()
        self.device = device
        self.npc = 10
        self.n_cluster = 4

        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(input_channels, hidden_channels, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(hidden_channels, hidden_channels*2, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(hidden_channels*2, latent_dim, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Flatten()
        )

        # Latent mean and variance
        self.mean_layer = nn.Linear(40000, 200)
        self.logvar_layer = nn.Linear(40000, 200)

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(200, 40000),
            Reshape(-1, 64, 25, 25),
            nn.ConvTranspose2d(latent_dim, hidden_channels*2, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(hidden_channels*2, hidden_channels, kernel_size=3, stride=2, padding=0),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(hidden_channels, input_channels, kernel_size=3, stride=2, padding=0),
            nn.ConstantPad2d((0, 1, 0, 1), value=0),  # Use ConstantPad2d for padding
            nn.Sigmoid()
        )

        # classifier

        self.classifer = nn.Sequential(
            nn.Linear(400, self.n_cluster),
            nn.Softmax()
        )

    def cluster(self, x):
        if self.n_cluster > 1:
            pca = PCA(n_components=50)
            pcs = pca.fit_transform(x)
            var = (pca.explained_variance_ratio_).cumsum()
            npc_raw = (np.where(var > 0.7))[0].min()

            if npc_raw > self.npc:
                npc_raw = self.npc
            pcs = pcs[:, :npc_raw]
            kmeans = KMeans().fit(StandardScaler.fit_transform(pcs))
            self.dummy_label = kmeans.labels_


    def encode(self, x):
        x = self.encoder(x)
        # print(x.shape)
        mean, logvar = self.mean_layer(x), self.logvar_layer(x)
        # print(mean.shape)
        return mean, logvar

    def reparameterization(self, mean, var):
        epsilon = torch.randn_like(var).to(self.device)
        z = mean + var*epsilon
        return z

    def decode(self, x):
        # print(x.shape)
        return self.decoder(x)

    def forward(self, x):
        self.cluster()
        mean, log_var = self.encode(x)
        z = self.reparameterization(mean, torch.exp(0.5 * log_var)) 
        predicted_labels = self.classifer(z)
        x_hat = self.decode(z)  
        return x_hat, mean, log_var, predicted_labels, self.dummy_label
    
    def get_latent_space(self, x):
        mean, log_var = self.encode(x)
        # print('mean: ', mean)
        # print('log var: ', log_var)
        z = self.reparameterization(mean, torch.exp(0.5 * log_var))
        # print('z: ', z)
        return z 

In [32]:
from torch.optim import Adam

model = VAE().to('cuda')
optimizer = Adam(model.parameters(), lr=1e-3)

def loss_function_decoder(x, x_hat, mean, log_var):
    reproduction_loss = nn.functional.binary_cross_entropy_with_logits(x_hat, x, reduction='sum')
    KLD = - 0.5 * torch.sum(1+ log_var - mean.pow(2) - log_var.exp())
    return reproduction_loss + KLD

def loss_function_clf(predicted_label, true_label):
    return nn.functional.cross_entropy(predicted_label, true_label)

def loss_function(x, x_hat, mean, log_var, predicted_label, true_label):
    return loss_function_decoder(x, x_hat, mean, log_var) + loss_function_clf(predicted_label, true_label)
    

In [9]:
def train(model, optimizer, epochs, device):
    model.train()
    for epoch in range(epochs):
        overall_loss = 0
        for batch_idx, x in enumerate(train_loader):
            
            x = x.to(device, dtype=torch.float)
            optimizer.zero_grad()

            x_hat, mean, log_var, p_label, true_label = model(x)
            loss = loss_function(x, x_hat, mean, log_var,p_label,true_label)

            loss.backward()

            overall_loss += loss.item()

            optimizer.step()

        print("\tEpoch", epoch + 1, "\tAverage Loss: ", overall_loss/(batch_idx*x.shape[0]))

    return overall_loss

train(model, optimizer, epochs=25, device='cuda')

	Epoch 1
	Epoch 2
	Epoch 3
	Epoch 4
	Epoch 5
	Epoch 6
	Epoch 7
	Epoch 8
	Epoch 9
	Epoch 10
	Epoch 11
	Epoch 12
	Epoch 13
	Epoch 14
	Epoch 15
	Epoch 16
	Epoch 17
	Epoch 18
	Epoch 19
	Epoch 20
	Epoch 21
	Epoch 22
	Epoch 23
	Epoch 24
	Epoch 25


0

In [10]:
torch.save(model.state_dict(),'vae.pt') 

In [44]:
## classifier
class PairwiseDistanceLayer(nn.Module):
    def __init__(self):
        super(PairwiseDistanceLayer, self).__init__()

    def forward(self, anchor, test):
        # print(anchor)
        # print(test)
        
        distances = F.cosine_similarity(anchor, test,2)
        # print(distances)
        return distances


class outputlayer(nn.Module):
    def __init__(self, vae_model):
        super(outputlayer, self).__init__()
        self.vae_model = vae_model
        self.distance_layer = PairwiseDistanceLayer()
        
    def forward(self, anchor, tests):
        # print(anchor.shape)
        # print('before: ', self.vae_model.get_latent_space(anchor).shape)
        anchor_embedding = self.vae_model.get_latent_space(anchor).unsqueeze(1).repeat(1,20,1)
        # print('a emb: ',anchor_embedding)
        # print('after: ',anchor_embedding.shape)

        tests_embedding = []
        # print('test shape', tests.shape)
        for i in range(tests.shape[1]):
            current_test = tests[:, i, :, :]
            tests_embedding.append(self.vae_model.get_latent_space(current_test).unsqueeze(1))
        
        tests_embedding = torch.cat(tests_embedding, dim=1)
        # print('test: ', tests_embedding)
        
        # expecting (32,20,256)
        
        # print('distance: ', self.distance_layer(anchor_embedding, tests_embedding).shape)

        distances = self.distance_layer(anchor_embedding, tests_embedding)
        # print('a emb: ',anchor_embedding)
        # print('t emb: ',tests_embedding)
        # print('distances: ', distances)

        output = nn.Softmax(dim=1)(distances)
        print(output)
        return output

In [45]:
class testDataset(Dataset):
    def __init__(self,test_anchor_dir, test_dir_paths, test_df_filename):
        self.test_candidates_df = pd.read_csv(test_df_filename)
        
        anchor_names = self.test_candidates_df['left'].values

        flatten_test_names = self.test_candidates_df.drop('left', axis=1).to_numpy().flatten()

        anchor_paths = [os.path.join(test_anchor_dir, filename+'.jpg') for filename in anchor_names]
        
        test_anchor_images = [read_image(path) for path in anchor_paths]

        # test_images_paths = [find_test_co_paths(path,test_dir_paths) for path in anchor_paths]

        test_paths = [os.path.join(test_dir_paths, filename+'.jpg') for filename in flatten_test_names]
        
        test_images = [read_image(path) for path in test_paths]

        self.test_anchor_images = torch.stack(test_anchor_images)
        self.test_images = torch.stack(test_images)

        # print(self.test_anchor_images.shape)
        # print(self.test_images.shape)
        

    def __len__(self):
        return len(self.test_candidates_df.index)
    def __getitem__(self, idx):

        anchor = self.test_anchor_images[idx]
        test = self.test_images[idx*20: (idx+1)*20]

        anchor, test = preprocess_doublets(anchor, test)

        # print(self.test_candidates_df.iloc[idx])
        return anchor, test


In [46]:
test_left_dir_path = "ImgData/test/left"
test_right_dir_path = "ImgData/test/right"

# test_left_images_path = getImagePaths(test_left_dir_path)
# test_right_images_path = getImagePaths(test_right_dir_path)

In [47]:
testset = testDataset(test_left_dir_path, test_right_dir_path,"test_candidates.csv")

In [48]:
test_loader = DataLoader(testset, batch_size=4,shuffle=False)

In [49]:
result = pd.read_csv('test_candidates.csv')

testNet = outputlayer(vae_model=model)

constant_batch_size = 4
with torch.no_grad():
    for i, (anchor, tests) in enumerate(test_loader):
        
        anchor = anchor.to('cuda', dtype=torch.float)
        tests = tests.to('cuda', dtype=torch.float)
    
        sims = testNet(anchor, tests).tolist()
        # print(sims)
        batch_size = len(sims)
        
        for j in range(batch_size):
            result.loc[j+constant_batch_size*i, result.columns != 'left'] = sims[j]

        

tensor([[0.0355, 0.0911, 0.0345, 0.0641, 0.0263, 0.0459, 0.0724, 0.0374, 0.0501,
         0.0646, 0.0225, 0.0509, 0.0469, 0.0621, 0.0501, 0.0898, 0.0220, 0.0292,
         0.0498, 0.0547],
        [0.0404, 0.0547, 0.0577, 0.0481, 0.0409, 0.0440, 0.0528, 0.0585, 0.0426,
         0.0547, 0.0574, 0.0650, 0.0515, 0.0283, 0.0444, 0.0464, 0.0288, 0.0542,
         0.0560, 0.0737],
        [0.0519, 0.0717, 0.0434, 0.0475, 0.0695, 0.0520, 0.0413, 0.0360, 0.0404,
         0.0641, 0.0470, 0.0327, 0.0472, 0.0498, 0.0379, 0.0589, 0.0714, 0.0464,
         0.0351, 0.0560],
        [0.0540, 0.0246, 0.0987, 0.0580, 0.0540, 0.0499, 0.0384, 0.0408, 0.0557,
         0.0420, 0.0425, 0.0338, 0.0681, 0.0429, 0.0366, 0.0354, 0.0481, 0.0469,
         0.0752, 0.0544]], device='cuda:0')
tensor([[0.0710, 0.0222, 0.0377, 0.0423, 0.0797, 0.0442, 0.0430, 0.0712, 0.0306,
         0.0798, 0.0325, 0.0482, 0.0330, 0.0512, 0.0503, 0.0535, 0.0575, 0.0698,
         0.0344, 0.0476],
        [0.0508, 0.0600, 0.0445, 0.0430, 0

In [51]:
result.to_csv("solutionVae.csv",index=False)