#### Roadmap
- The dataset we will be working with is the LFW dataset.
- We will initially try to break it into training and test sets and see the performance of the model
- We will then try to use external datasets to train the models and then test it on the LFW dataset
    - Try to just use the external dataset as training and then LFW dataset as the test. This should lead to different data distribution problems. Understand hot to solve these
    - Then may be use the large external dataset to train embeddings. Then fine tune on the LFW dataset. This should help us understand how to fine tune properly
- Then we will look into seeing if we can use the trained models on new faces using webcam. Try to see if we can implement some sort of webcam based lock

In [1]:
import os
from PIL import Image
import numpy as np
from collections import defaultdict, Counter

#### LFW Dataset

In [2]:
from torch.utils.data import Dataset, DataLoader

In [3]:
class PairsDataRead:
    def __init__(self):
        with open('./Data/pairs.txt', 'r') as f:
            pairs_data = f.readlines()
        self.pairs_data = self.clean_pairs_data(pairs_data)
        with open('./Data/pairsDevTest.txt', 'r') as f:
            pairs_data_dev = f.readlines()
        self.pairs_data_dev = self.clean_pairs_data(pairs_data_dev)
        with open('./Data/pairsDevTrain.txt', 'r') as f:
            pairs_data_train = f.readlines()
        self.pairs_data_train = self.clean_pairs_data(pairs_data_train)

    def clean_pairs_data(self, pairs_data):
        pairs_list = [pair.strip().split('\t') for pair in pairs_data]
        pairs_list_clean = []
        for pair in pairs_list:
            pair_clean = []
            if len(pair) == 3:
                pair_clean.append(pair[0])
                pair_clean.append(int(pair[1]))
                pair_clean.append(int(pair[2]))
            if len(pair) == 4:
                pair_clean.append(pair[0])
                pair_clean.append(int(pair[1]))
                pair_clean.append(pair[2])
                pair_clean.append(int(pair[3]))
            if pair_clean:
                pairs_list_clean.append(pair_clean)
        return pairs_list_clean

In [4]:
class ImageDataRead:
    def __init__(self, data_dir):
        self.data_dir = data_dir
        
    
    def get_image_data(self):
        images_data = []
        images_path_data = []
        for root, dirs, files in os.walk(self.data_dir):
            for file in files:
                if file.endswith('.jpg') or file.endswith('.png'):
                    file_path = os.path.join(root, file)
                    try:
                        with Image.open(file_path) as image:
                            image.load()
                            image_data = np.asarray(image, dtype = "int16")
                            images_data.append(image_data)
                            images_path_data.append(file_path)
                    except:
                        print(f"Unknown error has occured while reading file {file_path}")
        images_data_array = np.stack(images_data, axis = 0)
        return images_data_array, images_path_data

In [5]:
def get_image_file_n(name, idx):
    idx_str = str(idx)
    idx_str = '0'*(4 - len(idx_str)) + idx_str
    file_name = f"{name}_{idx_str}.jpg"
    return file_name

def get_image(name, idx):
    file_name = get_image_file_n(name, idx)
    return image_index_dict[file_name]

In [6]:
class LFWDataset:
    def __init__(self, image_data_dir):
        self.pdr = PairsDataRead()
        self.idr = ImageDataRead(image_data_dir)
        images_data_array, images_path_data = self.idr.get_image_data()
        self.image_1_data, self.image_2_data, self.labels = self._process_image_data(images_data_array, images_path_data)

    def _process_image_data(self, images_data_array, images_path_data):
        image_index_dict = defaultdict(lambda : None)
        character_index_dict = defaultdict(lambda : [])
        for idx, image_path in enumerate(images_path_data):
            image_name = image_path.split('\\')[-1]
            character_name = image_path.split('\\')[-2]
            image_index = idx
            image_index_dict[image_name] = image_index
            character_index_dict[character_name].append(image_index)
        
        image_1_index = [image_index_dict[get_image_file_n(pair[0], pair[1])] for pair in pdr.pairs_data_train]
        image_2_index = [
            image_index_dict[get_image_file_n(pair[0], pair[2])] if len(pair) == 3 
            else image_index_dict[get_image_file_n(pair[2], pair[3])] for pair in pdr.pairs_data_train
        ]
        matching_label = [
            1 if len(pair) == 3
            else 0 
            for pair in pdr.pairs_data_train
        ]
        image_1_data = images_data_array[image_1_index]
        image_2_data = images_data_array[image_2_index]
        labels = np.array(matching_label)
        return image_1_data, image_2_data, labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.image_1_data[idx], self.image_2_data[idx], self.labels[idx]

In [7]:
def get_image_file_n(name, idx):
    idx_str = str(idx)
    idx_str = '0'*(4 - len(idx_str)) + idx_str
    file_name = f"{name}_{idx_str}.jpg"
    return file_name

def get_image(name, idx):
    file_name = get_image_file_n(name, idx)
    return image_index_dict[file_name]

In [8]:
f"{1}\{2}"

'1\\2'

In [9]:
img_1 = np.random.rand(10,250,250,3) 

In [10]:
img_1.transpose(0,3,1,2).shape

(10, 3, 250, 250)

In [128]:
class LFWDatasetSlow:
    def __init__(self, image_data_dir, train = True, cache = True, transform = None, label_transform = None):
        self.pairs_data = PairsDataRead()
        self.image_data_dir = image_data_dir
        self.train = train
        self.cache = cache
        self.transform = transform
        self.label_transform = label_transform

    
    def __len__(self):
        if self.train:
            return len(self.pairs_data.pairs_data_train)
        else:
            return len(self.pairs_data.pairs_data_dev)

    def _get_image_from_name_idx(self, name, idx):
        idx_str = str(idx)
        idx_str = '0'*(4 - len(idx_str)) + idx_str
        file_name = f"{name}_{idx_str}.jpg"
        file_path = f"{self.image_data_dir}\{name}\{file_name}"
        img = Image.open(file_path)
        return np.array(img)

    
    def _get_image_label_pair(self, pair):
        if len(pair) == 3:
            label = 1
            img_1_name = pair[0]
            img_2_name = pair[0]
            img_1_idx = pair[1]
            img_2_idx = pair[2]
        else:
            label = 0
            img_1_name = pair[0]
            img_2_name = pair[2]
            img_1_idx = pair[1]
            img_2_idx = pair[3]
        img_1 = self._get_image_from_name_idx(img_1_name, img_1_idx)
        img_2 = self._get_image_from_name_idx(img_2_name, img_2_idx)
        if self.transform:
            img_1 = self.transform(img_1)
            img_2 = self.transform(img_2)
        if self.label_transform:
            label = self.label_transform(label)
        
        # img_1 = img_1.transpose(2, 0, 1)
        # img_2 = img_2.transpose(2, 0, 1)
        # img_1 = img_1.astype(np.float32)
        # img_2 = img_2.astype(np.float32)
        # label = np.array(label).astype(np.float32)
        return img_1, img_2, label

        
    def __getitem__(self, idx):
        if self.train:
            pairs_list = self.pairs_data.pairs_data_train
        else:
            pairs_list = self.pairs_data.pairs_data_dev
        pair = pairs_list[idx]
        return self._get_image_label_pair(pair)

        



In [86]:
import torchvision

In [91]:
from torchvision.transforms import transforms

In [138]:
custom_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize image
])

In [152]:
train_lfw_data = LFWDatasetSlow(".\Data\lfw_funneled", train = True, transform = custom_transform, label_transform = torch.tensor)
dev_lfw_data = LFWDatasetSlow(".\Data\lfw_funneled", train = False, transform = custom_transform, label_transform = torch.tensor)

In [153]:
train_dataloader = DataLoader(train_lfw_data, batch_size = 64, shuffle = True)
dev_dataloader = DataLoader(dev_lfw_data, batch_size = 64, shuffle = True)

##### We now have our initial dataset
- Build a resnet or some other standard vision model and train it on the data
- Build a test set similarly
- Test the performance on the test set
- Try to see how to use pretrained image based models here
  - Here one problem is how to deal with different resolutions of your images and the images on which the pretrained models were trained.
  - We can then just use cosine similarity and check performance.
  - We can also try to finetune the pre trained model properly and check performance

In [14]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.nn.functional as F

#### A simple siamese style classifier

In [190]:
class LeNet(nn.Module):
    def __init__(self, in_channels):
        super(LeNet, self).__init__()
        self.in_channels = in_channels
        self.model_chunk = nn.Sequential(
            nn.Conv2d(3, 16, 5, padding = 2),
            nn.ReLU(),
            # nn.Conv2d(16, 32, 5, padding = 1),
            # nn.ReLU(),
            nn.MaxPool2d(2),
            # nn.Conv2d(32, 16, 5, padding = 1),
            # nn.ReLU(),
            nn.Conv2d(16, 8, 5, padding = 2),
            nn.ReLU(),
            nn.MaxPool2d(2),
            # nn.Conv2d(8, 8, 5, padding = 1),
            # nn.ReLU(),
            nn.Conv2d(8, 4, 5, padding = 2),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(4, 4, 5, padding = 2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(900, 128),
            nn.ReLU(),
            nn.Linear(128, 128)
        )
    def forward(self, img):
        return self.model_chunk(img)

In [191]:
import torch.nn.functional as F

In [197]:
class SiameseNetwork(nn.Module):
    def __init__(self, in_channels, m = 0.5):
        super(SiameseNetwork, self).__init__()
        self.in_channels = in_channels
        self.m = m
        self.lenet_block = LeNet(in_channels)
        self.fc1 = nn.Linear(512, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 1)
        
    def forward(self, img1, img2):
        img1_emb = self.lenet_block(img1)
        img2_emb = self.lenet_block(img2)
        stacked_layer = torch.hstack([img1_emb, img2_emb, img1_emb + img2_emb, img1_emb - img2_emb])
        output = F.relu(self.fc1(stacked_layer))
        output = self.fc2(output)
        output = F.sigmoid(self.fc3(output))
        return output

    # def cosine_similarity(self, img1_emb, img2_emb):
    #     emb1_mod = torch.sqrt((img1_emb * img1_emb).sum(dim = 1))
    #     emb2_mod = torch.sqrt((img2_emb * img2_emb).sum(dim = 1))
    #     emb_dot = (img1_emb * img2_emb).sum(dim = 1)
    #     cosine_similarity = emb_dot/(emb1_mod * emb2_mod)
    #     return cosine_similarity

    # def cosine_similarity_adj(self, img1_emb, img2_emb):
    #     cosine_similarity = self.cosine_similarity(img1_emb, img2_emb)
    #     cosine_similarity = (cosine_similarity + 1)/2
    #     return cosine_similarity
    
    # def loss(self, img1_emb, img2_emb, label):
    #     cosine_similarity = self.cosine_similarity(img1_emb, img2_emb)
    #     cosine_distance = torch.pow((1 - cosine_similarity)/2, 2)
    #     contrasive_loss_1 = label*torch.pow((1 - cosine_distance), 2)
    #     contrasive_loss_2 = (1-label)*torch.pow(torch.clip(self.m - cosine_distance, min = 0), 2)
    #     contrasive_loss = contrasive_loss_1 + contrasive_loss_2
    #     contrasive_loss = torch.mean(contrasive_loss)
    #     return contrasive_loss

In [201]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [222]:
model = SiameseNetwork(3, 0.1)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.002)
n_epochs = 10
model.to(device)
for epoch in range(n_epochs):
    running_loss = 0
    accurate_predictions_tot = 0
    total_predictions_tot = 0
    for idx, (image1, image2, labels) in enumerate(train_dataloader):
        image1 = image1.to(device)
        image2 = image2.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        y_preds = model(image1, image2)
        loss = criterion(y_preds.squeeze(), labels.to(torch.float32))
        loss.backward()
        optimizer.step()
        running_loss += loss.detach()

        model.eval()
        with torch.no_grad():
            y_preds_class = (y_preds >= 0.5).int()
            accurate_predictions = (y_preds_class == labels.reshape(-1, 1)).sum()
            total_predictions = labels.shape[0]
            accurate_predictions_tot += accurate_predictions
            total_predictions_tot += total_predictions
            print(f"minibatch - {idx}, loss - {loss}, accurate predictions - {accurate_predictions}, total predictions - {total_predictions}")
    
    print(f"epoch - {epoch}, loss - {running_loss/idx}")

minibatch - 0, loss - 0.6866062879562378, accurate predictions - 37, total predictions - 64
minibatch - 1, loss - 0.7219350337982178, accurate predictions - 30, total predictions - 64
minibatch - 2, loss - 0.6995278000831604, accurate predictions - 29, total predictions - 64
minibatch - 3, loss - 0.6861046552658081, accurate predictions - 38, total predictions - 64
minibatch - 4, loss - 0.7254888415336609, accurate predictions - 31, total predictions - 64
minibatch - 5, loss - 0.6959218978881836, accurate predictions - 33, total predictions - 64
minibatch - 6, loss - 0.6942237615585327, accurate predictions - 31, total predictions - 64
minibatch - 7, loss - 0.6868767142295837, accurate predictions - 38, total predictions - 64
minibatch - 8, loss - 0.7019878029823303, accurate predictions - 31, total predictions - 64
minibatch - 9, loss - 0.6797695159912109, accurate predictions - 38, total predictions - 64
minibatch - 10, loss - 0.712787389755249, accurate predictions - 28, total predi

In [221]:
running_loss

tensor(2.3818e+10, device='cuda:0')

In [76]:
labels.shape

torch.Size([24])

In [77]:
image1.shape

torch.Size([24, 3, 250, 250])

In [78]:
y_preds.shape

torch.Size([24, 1])

In [79]:
y_preds_class.shape

torch.Size([24, 1])

In [205]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SiameseNetwork(3, 0.1)
optimizer = optim.Adam(model.parameters(), lr = 0.001)
n_epochs = 50
model.to(device)
model.train()
for epoch in range(n_epochs):
    running_loss = 0
    # correctly_classified = 0
    # total_classified = 0
    for idx, (image1, image2, labels) in enumerate(train_dataloader):
        image1 = image1.to(device)
        image2 = image2.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        img1_emb, img2_emb = model(image1, image2)
        loss = model.loss(img1_emb, img2_emb, labels)
        loss.backward()
        optimizer.step()
        running_loss = (1 - 1/(idx + 1)) * running_loss + (1/(idx + 1))*loss.detach()
        # model.eval()
        # with torch.no_grad():
        #     y_pred = model.cosine_similarity(img1_emb, img2_emb)
        #     y_pred_labels = (y_pred >= 0.8).float()
        #     correctly_classified += (y_pred_labels == labels).sum()
        #     total_classified += labels.shape[0] 
    print(f"epoch - {epoch}, loss - {running_loss}")

epoch - 0, loss - 0.43223831057548523
epoch - 1, loss - nan
epoch - 2, loss - nan


KeyboardInterrupt: 

In [122]:
torch.set_printoptions(sci_mode=False)

In [123]:
y_pred

tensor([     0.2087,      0.0405,      0.0579,      0.0562,      0.0156,
             0.1592,      0.1275,      0.1880,     -0.2900,      0.0466,
             0.0644,     -0.1400,      0.0448,      0.1228,     -0.1514,
             0.3569,      0.0361,     -0.0018,      0.5856,      0.0746,
             0.2159,     -0.0920,     -0.0996,      0.2665,      0.1758,
            -0.0734,      0.2963,      0.1449,     -0.0832,     -0.2128,
             0.1015,     -0.0795,      0.0229,     -0.2411,     -0.0988,
             0.0453,     -0.0463,      0.0370,     -0.0728,      0.0015,
            -0.1661,     -0.0042,     -0.0573,      0.0214,     -0.1041,
             0.0677,      0.3715,     -0.0792,     -0.1005,      0.0515,
             0.1049,      0.1055,      0.0482,     -0.0653,     -0.1535,
             0.1385,      0.0001,      0.0468,      0.1537,     -0.0759,
            -0.1166,      0.0403,      0.0225,      0.0866], device='cuda:0')

In [124]:
train_inference_dataloader = DataLoader(train_lfw_data, batch_size = 64, shuffle = False)
# dev_dataloader = DataLoader(dev_lfw_data, batch_size = 64, shuffle = False)

In [141]:
y_preds = []
all_labels = []
model.eval()
with torch.no_grad():
    for img1, img2, labels in train_inference_dataloader:
        img1 = img1.to(device)
        img2 = img2.to(device)
        img1_emb, img2_emb = model(img1, img2)
        y_pred = model.cosine_similarity_adj(img1_emb, img2_emb)
        y_preds.append(y_pred.detach())
        all_labels.append(labels)

In [142]:
y_pred_proba = torch.hstack(y_preds)
true_labels = torch.hstack(all_labels)

In [143]:
y_preds = y_pred_proba.to('cpu').numpy()
true_labels = true_labels.to('cpu').numpy()

In [148]:
import pickle

In [161]:
y_preds.shape

(2200,)

In [166]:
y_preds.tofile('./y_preds', sep = " ")
true_labels.tofile("./true_labels", sep = " ")

In [163]:
y_preds_test = np.fromfile('./y_preds', sep = " ")
true_labels = np.fromfile('./true_labels', sep = " ")

In [147]:
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
plt.hist(y_preds_test[true_labels == 1], bins = 45)
plt.hist(y_preds_test[true_labels == 0], bins = 45)
plt.show()

In [171]:
def accuracy(y_preds, threshold, true_labels):
    y_pred_labels = (y_preds > threshold).astype(int)
    return sum(y_pred_labels == true_labels)/len(true_labels)

In [172]:
accuracy(y_preds_test, 0.1, true_labels)

0.5

In [173]:
accuracy(y_preds_test, 0.2, true_labels)

0.495

In [174]:
accuracy(y_preds_test, 0.3, true_labels)

0.4827272727272727

In [175]:
accuracy(y_preds_test, 0.5, true_labels)

0.45

In [176]:
accuracy(y_preds_test, 0.9, true_labels)

0.5036363636363637

#### Optimizer pytorch guide

In [None]:
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
optimizer = optim.Adam([var1, var2], lr = 0.0001)

In [None]:
optim.SGD([
                {'params': model.base.parameters()},
                {'params': model.classifier.parameters(), 'lr': 1e-3}
            ], lr=1e-2, momentum=0.9)

- The first input to an optimizer is an iterable consisting of all the parameters that we want the optimizer to handle. The parameters are all of the type Variable.
- We can also divide the parameters into different groups where each group can have its own optimizer configuration like learning rate, momentum etc. We can also send in a master list of parameters which will act as the default for groups without an override.
- There are learning rate schedulers as well. These are usually applied similar to how an optimizer is applied, using the step function.
- These are usually applied after each epoch (why? Is it because applying after each mini batch breaks some sort of estimation?) 

In [1]:
from torch.optim.lr_scheduler import ExponentialLR        # We can import learning rate schedulers which update after every epoch  
from torch.optim.lr_scheduler import ReduceLROnPlateau    # This particular scheduler is more like a conditional one which is applied after out performance improves beyond some threshold

