In [13]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

from torch.nn import SyncBatchNorm
from torchvision.models import resnet50, resnet152
from torchvision.io import read_image

import warnings
warnings.filterwarnings("ignore")

target_shape = (200, 200)

In [14]:
def get_image_paths(path):
    image_names = []
    for dirname, _, filenames in os.walk(path):
        for filename in filenames:
            fullpath = os.path.join(dirname, filename)
            image_names.append(fullpath)
    return image_names

left_dir_path = "ImgData/train/left"
right_dir_path = "ImgData/train/right"

left_images_path = get_image_paths(left_dir_path)
right_images_path = get_image_paths(right_dir_path)

print(f"Number of left images: {len(left_images_path)}\n")
print(f"Number of right images: {len(right_images_path)}\n")

Number of left images: 2000

Number of right images: 2000



In [15]:
anchor_images = left_images_path
positive_images = right_images_path

In [16]:
def preprocess_image(image_tensor):

    transform = transforms.Compose([
    transforms.Resize(target_shape, antialias=True),  
    ])
    
    image = transform(image_tensor)
    return image

def preprocess_triplets(anchor, positive, negative):
    """
    Given the filenames corresponding to the three images, load and
    preprocess them.
    """
    return (
        preprocess_image(anchor),
        preprocess_image(positive),
        preprocess_image(negative),
    )


def preprocess_doublets(anchor, test):
    return (
        preprocess_image(anchor),
        preprocess_image(test),
    )


In [17]:
def find_co_path(path, lookup, target_path):
    file_name = os.path.basename(path)
    src_img_name=os.path.splitext(file_name)[0]
    co_img_name= lookup.loc[lookup['left']==src_img_name]['right']
    co_img_name=co_img_name.values[0]
    return os.path.join(target_path,co_img_name+'.jpg')
    
ilookup = pd.read_csv("train.csv")

In [18]:
find_co_path('ImgData/train/left/aaz.jpg', ilookup, right_dir_path)

'ImgData/train/right\\mqw.jpg'

In [19]:
class TLLDataset(Dataset):
    def __init__(self, anchor_image_paths, positive_image_paths, ilookup_path):
        ilookup = pd.read_csv(ilookup_path)
        anchor_images = [read_image(path) for path in anchor_image_paths]
        positive_images = [read_image(find_co_path(path,ilookup,os.path.dirname(positive_image_paths[0]))) for path in anchor_image_paths]

        negative_images = anchor_images + positive_images
        random.shuffle(negative_images)
        
        self.a_images = torch.stack(anchor_images)
        self.p_images = torch.stack(positive_images)
        self.n_images = torch.stack(negative_images)
        
    def __len__(self):
        return len(self.a_images)
    
    def __getitem__(self, idx):
        anchor = self.a_images[idx]
        positive = self.p_images[idx]
        negative = self.n_images[idx]
        
        anchor, positive, negative = preprocess_triplets(anchor, positive, negative)
        
        return anchor, positive, negative

ilookup_path = 'train.csv'
tll_dataset = TLLDataset(anchor_images,positive_images,ilookup_path)

image_num = len(tll_dataset)
img_indices = list(range(image_num))
train_indices = img_indices[:round(image_num * 0.8)]
val_indices = img_indices[round(image_num * 0.8):]

train_loader = DataLoader(tll_dataset, batch_size=32)
val_loader = DataLoader(tll_dataset, batch_size=32)

In [20]:
class EmbeddingNet(nn.Module):
    def __init__(self):
        super(EmbeddingNet, self).__init__()
        resnet = resnet152(pretrained=True)
        for name, param in resnet.named_parameters():
            if "layer4" not in name:
                param.requires_grad = False
        self.features = nn.Sequential(*list(resnet.children())[:-2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.dense1 = nn.Sequential(nn.Linear(2048, 1024), nn.ReLU(), SyncBatchNorm(1024))
        self.dense2 = nn.Sequential(nn.Linear(1024, 512), nn.ReLU(), SyncBatchNorm(512))
        self.output = nn.Linear(512, 256)
        
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.output(x)
        return x

embedding_net = EmbeddingNet()

In [21]:
class DistanceLayer(nn.Module):
    def __init__(self):
        super(DistanceLayer, self).__init__()

    def forward(self, anchor, positive, negative):
        ap_distance = F.pairwise_distance(anchor, positive, 2)
        an_distance = F.pairwise_distance(anchor, negative, 2)
        return ap_distance, an_distance

class SiameseNetwork(nn.Module):
    def __init__(self, embedding_net):
        super(SiameseNetwork, self).__init__()
        self.embedding_net = embedding_net
        self.distance_layer = DistanceLayer()
        
    def forward(self, anchor, positive, negative):
        anchor_embedding = self.embedding_net(anchor)
        positive_embedding = self.embedding_net(positive)
        negative_embedding = self.embedding_net(negative)
        ap_distance, an_distance = self.distance_layer(anchor_embedding, positive_embedding, negative_embedding)
        return ap_distance, an_distance

siamese_network = SiameseNetwork(embedding_net)

In [22]:
class TripletMarginLoss(nn.Module):
    def __init__(self, margin):
        super(TripletMarginLoss, self).__init__()
        self.margin = margin

    def forward(self, ap_distance, an_distance):
        return F.relu(ap_distance - an_distance + self.margin)

In [23]:

device = 'cuda'
margin = 0.5

siamese_network = siamese_network
criterion = TripletMarginLoss(margin)
optimizer = torch.optim.Adam(siamese_network.parameters(), lr=0.001)

loss_tracker = []
val_loss_tracker = []


def train(train_loader, device):
    epoch_loss = 0.0 
    for anchor,positive,negative in train_loader:
        anchor = anchor.to(device, dtype=torch.float)
        positive = positive.to(device, dtype=torch.float)
        negative = negative.to(device, dtype=torch.float)
        
        siamese_network.train() 
        optimizer.zero_grad()
        ap_distance, an_distance = siamese_network(anchor, positive, negative)
        loss = criterion(ap_distance, an_distance)

        if loss.numel() > 1:
            loss = loss.mean() 

        loss.backward()
        optimizer.step()
        loss_tracker.append(loss.item())

        epoch_loss += loss
        
    torch.cuda.empty_cache()
    return epoch_loss / len(train_loader)




# val 
def validate(val_loader,device):
    epoch_loss = 0.0
    for anchor,positive,negative in val_loader:
        anchor = anchor.to(device, dtype=torch.float)
        positive = positive.to(device, dtype=torch.float)
        negative = negative.to(device, dtype=torch.float)
        
        siamese_network.eval()
        with torch.no_grad():
            ap_distance, an_distance = siamese_network(anchor, positive, negative)
            loss = criterion(ap_distance, an_distance)

            if loss.numel() > 1:
                loss = loss.mean()  

        val_loss_tracker.append(loss.item())
        epoch_loss += loss
        
    torch.cuda.empty_cache()
    return epoch_loss / len(val_loader)


In [24]:
device = torch.device("cuda" if torch.cuda.is_available() else "mps")
siamese_network = siamese_network.to(device)

num_epochs = 20

for epoch in range(num_epochs):
    train_loss = train(train_loader, device)
    val_loss = validate(val_loader, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")
    loss_tracker = []
    val_loss_tracker = []


RuntimeError: Default process group has not been initialized, please make sure to call init_process_group.

In [None]:
torch.save(siamese_network, 'test_embed_152_pool.pth')

# test codes

In [None]:
## classifier
class PairwiseDistanceLayer(nn.Module):
    def __init__(self):
        super(PairwiseDistanceLayer, self).__init__()

    def forward(self, anchor, test):
        distances = F.cosine_similarity(anchor, test,2)
        return distances


class outputlayer(nn.Module):
    def __init__(self, embedding_net):
        super(outputlayer, self).__init__()
        self.embedding_net = embedding_net
        self.distance_layer = PairwiseDistanceLayer()
        
    def forward(self, anchor, tests):        
        anchor_embedding = self.embedding_net(anchor).unsqueeze(1).repeat(1,20,1)
        tests_embedding = []

        for i in range(tests.shape[1]):
            current_test = tests[:, i, :, :, :]
            tests_embedding.append(self.embedding_net(current_test).unsqueeze(1))
        
        tests_embedding = torch.cat(tests_embedding, dim=1)
        distances = self.distance_layer(anchor_embedding, tests_embedding)
        output = nn.Softmax(dim=1)(distances)
        return output

In [None]:
lookup_table = pd.read_csv('test_candidates.csv')

In [None]:
lookup_table.drop('left', axis=1).to_numpy().flatten()[20]

'jzn'

In [None]:
class testDataset(Dataset):
    def __init__(self,test_anchor_dir, test_dir_paths, test_df_filename):
        self.test_candidates_df = pd.read_csv(test_df_filename)
        
        anchor_names = self.test_candidates_df['left'].values

        flatten_test_names = self.test_candidates_df.drop('left', axis=1).to_numpy().flatten()

        anchor_paths = [os.path.join(test_anchor_dir, filename+'.jpg') for filename in anchor_names]
        
        test_anchor_images = [read_image(path) for path in anchor_paths]

        # test_images_paths = [find_test_co_paths(path,test_dir_paths) for path in anchor_paths]

        test_paths = [os.path.join(test_dir_paths, filename+'.jpg') for filename in flatten_test_names]
        
        test_images = [read_image(path) for path in test_paths]

        self.test_anchor_images = torch.stack(test_anchor_images)
        self.test_images = torch.stack(test_images)


    def __len__(self):
        return len(self.test_candidates_df.index)
    def __getitem__(self, idx):

        anchor = self.test_anchor_images[idx]
        test = self.test_images[idx*20: (idx+1)*20]

        anchor, test = preprocess_doublets(anchor, test)

        return anchor, test


In [None]:
test_left_dir_path = "ImgData/test/left"
test_right_dir_path = "ImgData/test/right"


In [None]:
testset = testDataset(test_left_dir_path, test_right_dir_path,"test_candidates.csv")

In [None]:
test_loader = DataLoader(testset, batch_size=32,shuffle=False)

In [None]:
testNet = outputlayer(siamese_network.embedding_net).to('mps')

result = pd.read_csv('test_candidates.csv')
constant_batch_size = 32
with torch.no_grad():
    for i, (anchor, tests) in enumerate(test_loader):
        
        anchor = anchor.to('mps', dtype=torch.float)
        tests = tests.to('mps', dtype=torch.float)
    
        sims = testNet(anchor, tests).tolist()
        batch_size = len(sims)
        
        for j in range(batch_size):
            result.loc[j+constant_batch_size*i, result.columns != 'left'] = sims[j]

        

In [None]:
result.to_csv("solutioncorrect152pretrainedpool.csv",index=False)