In [3]:
# import dependencies
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

from pathlib import Path

In [22]:
# Initialize dataloader
## Check for positive and negative pairs
class MyDataset(Dataset):
    def __init__(self):
        dir1 = '../data/Videos/RECOLA/group-1/P25'
        dir2 = '../data/Videos/RECOLA/group-1/P26'
        
        vid1 = list(Path(dir1).glob('*.pt'))
        vid2 = list(Path(dir2).glob('*.pt'))
        
        self.pairPos = []
        self.pairNeg = []
        
        for i in vid1:
            for j in vid2:
                if i.name == j.name:
                    self.pairPos.append((str(i), str(j)))
                else:
                    self.pairNeg.append((str(i), str(j)))

        self.vidPairs = self.pairPos + self.pairNeg
    
    def __len__(self):
        return len(self.vidPairs)
    
    def __getitem__(self, idx):
        pair = self.vidPairs[idx]
        fname1, fname2 = pair
        
        center = transforms.CenterCrop(224)
        
        arr1 = torch.load(fname1, map_location=torch.device('cpu'))
        arr1 = center(arr1)
        
        arr2 = torch.load(fname2, map_location=torch.device('cpu'))
        arr2 = center(arr2)
        
        if pair in self.pairPos:
            label = 1
        else:
            label = 0
        return arr1, arr2, label

In [23]:
dataset = MyDataset()

In [24]:
# Implement dataloader
loader = DataLoader(dataset = dataset,
                         batch_size = 4,
                         shuffle = True,
                         num_workers = 0)

In [25]:
# Print data items
next(iter(loader))

[tensor([[[ 0.0253,  0.0906,  0.3925,  ...,  0.1650,  0.1324,  0.0832],
          [ 0.0569,  0.0837,  0.2777,  ...,  0.0596,  0.1448, -0.0066],
          [ 0.0938,  0.0254,  0.0280,  ..., -0.0726,  0.1543, -0.1284],
          ...,
          [-0.0199,  0.0437,  0.2725,  ...,  0.2110, -0.3041, -0.0148],
          [-0.0103,  0.0571,  0.2696,  ...,  0.2009, -0.2907, -0.0164],
          [-0.0298,  0.0522,  0.2969,  ...,  0.2202, -0.2885,  0.0117]],
 
         [[ 0.0253,  0.0906,  0.3925,  ...,  0.1650,  0.1324,  0.0832],
          [ 0.0569,  0.0837,  0.2777,  ...,  0.0596,  0.1448, -0.0066],
          [ 0.0938,  0.0254,  0.0280,  ..., -0.0726,  0.1543, -0.1284],
          ...,
          [-0.0199,  0.0437,  0.2725,  ...,  0.2110, -0.3041, -0.0148],
          [-0.0103,  0.0571,  0.2696,  ...,  0.2009, -0.2907, -0.0164],
          [-0.0298,  0.0522,  0.2969,  ...,  0.2202, -0.2885,  0.0117]],
 
         [[-0.0108,  0.0914,  0.5108,  ...,  0.2827,  0.0883,  0.1526],
          [ 0.0840,  0.1099,

In [26]:
fname1 = '../data/Videos/UDIVA/talk/009106/FC1/5.pt'
arr1 = torch.load(fname1)
print(arr1)

print(arr1.size())

fname2 = '../data/Videos/UDIVA/talk/009106/FC1/8.pt'
arr2 = torch.load(fname2)
print(arr2)

print(arr1.size())

tensor([[ 0.2508,  0.1030,  0.4502,  ..., -0.0722,  0.0798, -0.0950],
        [ 0.1505,  0.0704,  0.5167,  ..., -0.0390, -0.0546, -0.1448],
        [ 0.1776, -0.1022,  0.4482,  ...,  0.0339,  0.0261, -0.1700],
        ...,
        [ 0.4826,  0.4615,  0.7990,  ..., -0.0745,  0.0077, -0.0740],
        [ 0.4449,  0.4815,  0.7754,  ..., -0.0778,  0.0033, -0.0728],
        [ 0.4452,  0.4813,  0.8211,  ..., -0.0809, -0.0170, -0.0765]],
       device='cuda:0')
torch.Size([1568, 1024])
tensor([[ 0.2929,  0.1070,  0.5795,  ..., -0.1024, -0.0116, -0.0616],
        [ 0.2662, -0.0315,  0.4907,  ..., -0.0344, -0.0830, -0.1438],
        [ 0.2325, -0.0773,  0.4541,  ...,  0.0141,  0.2357, -0.1681],
        ...,
        [ 0.4502,  0.4946,  0.7975,  ..., -0.0816, -0.0059, -0.0818],
        [ 0.4588,  0.4692,  0.7458,  ..., -0.0676, -0.0190, -0.0707],
        [ 0.4488,  0.4945,  0.7716,  ..., -0.0751, -0.0045, -0.0791]],
       device='cuda:0')
torch.Size([1568, 1024])


In [27]:
from dtaidistance import dtw_ndim

# # Convert CPU tensors to NumPy arrays
series1 = arr1.detach().cpu().numpy()
series2 = arr2.detach().cpu().numpy()

# Compute DTW distance
d = dtw_ndim.distance(series1, series2)
d

212.34076157994

In [16]:
from tqdm import tqdm
import glob
from dtaidistance import dtw_ndim
# List of file names

file_names = glob.glob('../data/Videos/UDIVA/talk/010011/FC1/*.pt')
filter_fn = list(filter(lambda x: 'marlin' not in x, file_names))

# Empty list to store distances
distances = []

# Iterate over the file names
for fname in filter_fn:
    # Load the .pt file
    file_path = f'{fname}'
    arr = torch.load(file_path)
    series = arr.detach().cpu().numpy()

    # Calculate the distance between the series
    distances.append(series)

In [18]:
distances

[array([[ 0.57913065,  0.42112774,  0.9249832 , ..., -0.00344198,
         -0.02629023, -0.04958002],
        [ 0.23804273, -0.00851348,  0.81470656, ...,  0.19028363,
          0.10175048, -0.08456265],
        [ 0.31174573, -0.20416155,  1.1563263 , ...,  0.24901818,
          0.67815006, -0.1586102 ],
        ...,
        [ 0.55642587,  0.5334528 ,  0.98610795, ..., -0.07385125,
          0.02331319, -0.07473528],
        [ 0.55729693,  0.52521056,  0.978177  , ..., -0.07933114,
          0.02064587, -0.08112666],
        [ 0.5546756 ,  0.5043052 ,  0.94122237, ..., -0.08538174,
         -0.01672468, -0.07822555]], dtype=float32),
 array([[ 0.41640398,  0.28057367,  0.8588766 , ..., -0.01438825,
          0.15737976, -0.07450838],
        [ 0.4339363 ,  0.01365504,  0.85064405, ...,  0.11762119,
          0.36278173, -0.11144395],
        [ 0.3312143 , -0.12153621,  1.2768335 , ...,  0.25238097,
          0.68261   , -0.14934085],
        ...,
        [ 0.5638241 ,  0.53480345,  0.9

In [19]:
matrix = [[0] * len(distances) for _ in range(len(distances))]

In [20]:
for i in tqdm(range(len(distances))):
    for j in range(i + 1, len(distances)):
        series1 = distances[i]
        series2 = distances[j]
        d = dtw_ndim.distance(series1, series2)
        matrix[i][j] = d
        matrix[j][i] = d

100%|█████████████████████████████████████████████████████████████████████████████████████| 4/4 [03:06<00:00, 46.75s/it]


In [21]:
matrix

[[0, 191.16724672751928, 267.5609420862668, 555.4664568572321],
 [191.16724672751928, 0, 363.15279173158393, 484.75444712672044],
 [267.5609420862668, 363.15279173158393, 0, 634.8540448006826],
 [555.4664568572321, 484.75444712672044, 634.8540448006826, 0]]

In [24]:
from sklearn.preprocessing import normalize
normalized_matrix = normalize(matrix, axis=1, norm='l1')

In [25]:
normalized_matrix

array([[0.        , 0.18849167, 0.26381617, 0.54769216],
       [0.18397839, 0.        , 0.3494964 , 0.46652521],
       [0.21141573, 0.28694851, 0.        , 0.50163575],
       [0.33160693, 0.28939269, 0.37900038, 0.        ]])

In [23]:
for i in tqdm(range(len(matrix))):
    for j in range(len(matrix[i])):
        print(f"Distance between {filter_fn[i]} and {filter_fn[j]}: {matrix[i][j]}")

100%|███████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 4029.11it/s]

Distance between ../data/Videos/UDIVA/talk/010011/FC1/17.pt and ../data/Videos/UDIVA/talk/010011/FC1/17.pt: 0
Distance between ../data/Videos/UDIVA/talk/010011/FC1/17.pt and ../data/Videos/UDIVA/talk/010011/FC1/16.pt: 191.16724672751928
Distance between ../data/Videos/UDIVA/talk/010011/FC1/17.pt and ../data/Videos/UDIVA/talk/010011/FC1/18.pt: 267.5609420862668
Distance between ../data/Videos/UDIVA/talk/010011/FC1/17.pt and ../data/Videos/UDIVA/talk/010011/FC1/6.pt: 555.4664568572321
Distance between ../data/Videos/UDIVA/talk/010011/FC1/16.pt and ../data/Videos/UDIVA/talk/010011/FC1/17.pt: 191.16724672751928
Distance between ../data/Videos/UDIVA/talk/010011/FC1/16.pt and ../data/Videos/UDIVA/talk/010011/FC1/16.pt: 0
Distance between ../data/Videos/UDIVA/talk/010011/FC1/16.pt and ../data/Videos/UDIVA/talk/010011/FC1/18.pt: 363.15279173158393
Distance between ../data/Videos/UDIVA/talk/010011/FC1/16.pt and ../data/Videos/UDIVA/talk/010011/FC1/6.pt: 484.75444712672044
Distance between ../da




In [14]:
import glob
files = glob.glob('../data/Videos/UDIVA/**/**/FC1/*.pt')

In [15]:
list(filter(lambda x: 'marlin' not in x, files))

['../data/Videos/UDIVA/talk/169171/FC1/8.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/14.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/13.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/7.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/17.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/16.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/9.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/10.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/18.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/11.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/15.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/12.pt',
 '../data/Videos/UDIVA/talk/169171/FC1/6.pt',
 '../data/Videos/UDIVA/talk/172185/FC1/8.pt',
 '../data/Videos/UDIVA/talk/172185/FC1/14.pt',
 '../data/Videos/UDIVA/talk/172185/FC1/19.pt',
 '../data/Videos/UDIVA/talk/172185/FC1/13.pt',
 '../data/Videos/UDIVA/talk/172185/FC1/7.pt',
 '../data/Videos/UDIVA/talk/172185/FC1/17.pt',
 '../data/Videos/UDIVA/talk/172185/FC1/16.pt',
 '../data/Videos/UDIVA/talk/172185/FC1/20.pt',
 '../data/Videos/UD

In [66]:
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        
        # Implement Siamese network with Convolutional Neural Network and Fully Connected Network
        self.cnn1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(128 * 14 * 14, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 128)
        )

    # Forward pass through the model for a single input
    def forward_once(self, x):
        x = self.cnn1(x)
        x = x.view(x.size()[0], -1)
        x = self.fc(x)
        return x
    
    # Defines the complete forward pass through the model for a pair of inputs
    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return output1, output2

In [67]:
# Used for learning similarity or dissimilarity between inputs
class ContrastiveLoss(torch.nn.Module):
    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        
          # Calculate the euclidean distance and calculate the contrastive loss
          euclidean_distance = F.pairwise_distance(output1, output2, keepdim = True)

          loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                    (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))


          return loss_contrastive

In [68]:
net = SiameseNetwork()
criterion = ContrastiveLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.0005)

In [69]:
# Training loop to train the Siamese network using the contrastive loss
counter = []
loss_history = [] 
iteration_number= 0

# Iterate throught the epochs
for epoch in range(10):

    # Iterate over batches
    for i, (img0, img1, label) in enumerate(loader, 0):

        # Send the images and labels to CUDA
#         img0, img1, label = img0.cuda(), img1.cuda(), label.cuda()

        # Zero the gradients
        optimizer.zero_grad()

        # Pass in the two images into the network and obtain two outputs
        output1, output2 = net(img0, img1)

        # Pass the outputs of the networks and label into the loss function
        loss_contrastive = criterion(output1, output2, label)

        # Calculate the backpropagation
        loss_contrastive.backward()

        # Optimize
        optimizer.step()

        # Every 10 batches print out the loss
        if i % 10 == 0 :
            print(f"Epoch number {epoch}\n Current loss {loss_contrastive.item()}\n")
            iteration_number += 10

            counter.append(iteration_number)
            loss_history.append(loss_contrastive.item())

show_plot(counter, loss_history)

RuntimeError: Given groups=1, weight of size [16, 3, 3, 3], expected input[1, 4, 224, 224] to have 3 channels, but got 4 channels instead