In [30]:
# import dependencies
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

from pathlib import Path

In [59]:
# Initialize dataloader
## Check for positive and negative pairs
class MyDataset(Dataset):
    def __init__(self):
        dir1 = '../data/Videos/RECOLA/group-1/P25'
        dir2 = '../data/Videos/RECOLA/group-1/P26'
        
        vid1 = list(Path(dir1).glob('*.pt'))
        vid2 = list(Path(dir2).glob('*.pt'))
        
        self.pairPos = []
        self.pairNeg = []
        
        for i in vid1:
            for j in vid2:
                if i.name == j.name:
                    self.pairPos.append((str(i), str(j)))
                else:
                    self.pairNeg.append((str(i), str(j)))

        self.vidPairs = self.pairPos + self.pairNeg
    
    def __len__(self):
        return len(self.vidPairs)
    
    def __getitem__(self, idx):
        pair = self.vidPairs[idx]
        fname1, fname2 = pair
        
        center = transforms.CenterCrop(224)
        
        arr1 = torch.load(fname1, map_location=torch.device('cpu'))
        arr1 = center(arr1)
        
        arr2 = torch.load(fname2, map_location=torch.device('cpu'))
        arr2 = center(arr2)
        
        if pair in self.pairPos:
            label = 1
        else:
            label = 0
        return arr1, arr2, label

In [60]:
dataset = MyDataset()

[('../data/Videos/RECOLA/group-1/P25/8.pt', '../data/Videos/RECOLA/group-1/P26/8.pt'), ('../data/Videos/RECOLA/group-1/P25/4.pt', '../data/Videos/RECOLA/group-1/P26/4.pt'), ('../data/Videos/RECOLA/group-1/P25/1.pt', '../data/Videos/RECOLA/group-1/P26/1.pt'), ('../data/Videos/RECOLA/group-1/P25/7.pt', '../data/Videos/RECOLA/group-1/P26/7.pt'), ('../data/Videos/RECOLA/group-1/P25/5.pt', '../data/Videos/RECOLA/group-1/P26/5.pt'), ('../data/Videos/RECOLA/group-1/P25/9.pt', '../data/Videos/RECOLA/group-1/P26/9.pt'), ('../data/Videos/RECOLA/group-1/P25/10.pt', '../data/Videos/RECOLA/group-1/P26/10.pt'), ('../data/Videos/RECOLA/group-1/P25/3.pt', '../data/Videos/RECOLA/group-1/P26/3.pt'), ('../data/Videos/RECOLA/group-1/P25/2.pt', '../data/Videos/RECOLA/group-1/P26/2.pt'), ('../data/Videos/RECOLA/group-1/P25/6.pt', '../data/Videos/RECOLA/group-1/P26/6.pt')]


In [54]:
# Implement dataloader
loader = DataLoader(dataset = dataset,
                         batch_size = 4,
                         shuffle = True,
                         num_workers = 0)

In [51]:
# Print data items
next(iter(loader))

[tensor([[[ 0.0059,  0.1047,  0.4675,  ...,  0.2070,  0.1780,  0.0918],
          [ 0.0612,  0.1055,  0.2587,  ...,  0.0476,  0.1953,  0.0511],
          [ 0.0956,  0.0597,  0.0654,  ..., -0.0451,  0.1472, -0.1271],
          ...,
          [-0.0249,  0.0334,  0.2441,  ...,  0.1902, -0.3290, -0.0207],
          [ 0.0095,  0.0378,  0.2503,  ...,  0.1832, -0.3068, -0.0439],
          [-0.0232,  0.0424,  0.2667,  ...,  0.1918, -0.3030, -0.0131]],
 
         [[-0.0158,  0.1008,  0.4353,  ...,  0.2245,  0.1015,  0.0984],
          [ 0.0302,  0.1069,  0.3989,  ...,  0.0641,  0.1908,  0.0315],
          [ 0.1200,  0.0826,  0.0654,  ..., -0.0347,  0.2109, -0.0822],
          ...,
          [-0.0416,  0.0188,  0.2450,  ...,  0.2099, -0.3396, -0.0330],
          [ 0.0064,  0.0510,  0.2399,  ...,  0.2111, -0.2824, -0.0496],
          [-0.0237,  0.0501,  0.2523,  ...,  0.2035, -0.3073, -0.0236]],
 
         [[-0.0464,  0.0436,  0.4316,  ...,  0.2038,  0.0932,  0.1581],
          [-0.0352,  0.1016,

In [38]:
from dtaidistance import dtw_ndim

In [39]:
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        
        # Implement Siamese network with Convolutional Neural Network and Fully Connected Network
        self.cnn1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(128 * 14 * 14, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 128)
        )

    # Forward pass through the model for a single input
    def forward_once(self, x):
        x = self.cnn1(x)
        x = x.view(x.size()[0], -1)
        x = self.fc(x)
        return x
    
    # Defines the complete forward pass through the model for a pair of inputs
    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return output1, output2

In [40]:
# Used for learning similarity or dissimilarity between inputs
class ContrastiveLoss(torch.nn.Module):
    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        
          # Calculate the euclidean distance and calculate the contrastive loss
          euclidean_distance = F.pairwise_distance(output1, output2, keepdim = True)

          loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                    (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))


          return loss_contrastive

In [41]:
net = SiameseNetwork()
criterion = ContrastiveLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.0005)

In [26]:
# Training loop to train the Siamese network using the contrastive loss
counter = []
loss_history = [] 
iteration_number= 0

# Iterate throught the epochs
for epoch in range(10):

    # Iterate over batches
    for i, (img0, img1, label) in enumerate(loader, 0):

        # Send the images and labels to CUDA
#         img0, img1, label = img0.cuda(), img1.cuda(), label.cuda()

        # Zero the gradients
        optimizer.zero_grad()

        # Pass in the two images into the network and obtain two outputs
        output1, output2 = net(img0, img1)

        # Pass the outputs of the networks and label into the loss function
        loss_contrastive = criterion(output1, output2, label)

        # Calculate the backpropagation
        loss_contrastive.backward()

        # Optimize
        optimizer.step()

        # Every 10 batches print out the loss
        if i % 10 == 0 :
            print(f"Epoch number {epoch}\n Current loss {loss_contrastive.item()}\n")
            iteration_number += 10

            counter.append(iteration_number)
            loss_history.append(loss_contrastive.item())

show_plot(counter, loss_history)

RuntimeError: Given groups=1, weight of size [16, 3, 3, 3], expected input[1, 4, 224, 224] to have 3 channels, but got 4 channels instead