In [17]:
import os
import cv2
import numpy as np
import torch
import pandas as pd

class PerspectiveDataset(torch.utils.data.Dataset):
    def __init__(self, img_paths, label_file):
        self.img_dim = (512, 512)
        self.data = []
        df = pd.read_csv(label_file, header = None)
        with open (img_paths, 'r') as file:
            for index, image in enumerate(file):
                image = image[0:-1]
                self.data.append([image, list(df.iloc[index])])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, target = self.data[idx]
        
        # forming image tensors
        img = cv2.imread(img_path)
        height, width, channels = img.shape
        img = cv2.resize(img, self.img_dim)
        img_tensor = torch.from_numpy(img)
        img_tensor = img_tensor.permute(2, 0, 1)
        
        # normalizing image tensors to be between 0 and 1
        img_tensor = (img_tensor - img_tensor.min())/img_tensor.max()
        
        # forming label tensors
        label_tensor = torch.tensor(target)
        
        # adjusting label coords according to default image dimension
        for i in range(8):
            if i%2 == 0:
                label_tensor[i] = label_tensor[i]*self.img_dim[0]/width
            else:
                label_tensor[i] = label_tensor[i]*self.img_dim[1]/height
                
        # normalizing label tensors to be between 0 and 1
        label_tensor = (label_tensor - label_tensor.min())/label_tensor.max()
        
        return img_tensor, label_tensor

if __name__ == "__main__":
    train_set = PerspectiveDataset('train_img_paths.txt', 'train.csv')
    trainloader = torch.utils.data.DataLoader(train_set, batch_size=8, shuffle=True)
    test_set = PerspectiveDataset('test_img_paths.txt', 'test.csv')
    testloader = torch.utils.data.DataLoader(test_set, batch_size=8, shuffle=True)

In [16]:
len(trainloader)

694

In [18]:
import torch.nn as nn
import torch.nn.functional as F

class PDnet(nn.Module):
    def __init__(self):
        super(PDnet, self).__init__()

        # convolutions,batch norms and pools
        self.conv1 = nn.Conv2d(3, 32, 3, padding = 1)
        # 32, 512, 512
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, 3, padding = 1)
        # 32, 512, 512
        self.bn2 = nn.BatchNorm2d(32)
        # insert pooling layer 32, 256, 256
        self.conv3 = nn.Conv2d(32, 64, 3, padding = 1)
        # 64, 256, 256
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, 3, padding = 1)
        # 64, 256, 256
        self.bn4 = nn.BatchNorm2d(64)
        # insert pooling layer 64, 128, 128
        self.conv5 = nn.Conv2d(64, 128, 3, padding = 1)
        # 128, 128, 128
        self.bn5 = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 256, 3, padding = 1)
        # 256, 128, 128
        self.bn6 = nn.BatchNorm2d(256)
        self.conv7 = nn.Conv2d(256, 256, 3, padding = 1)
        # 256, 128, 128
        self.bn7 = nn.BatchNorm2d(256)
        # insert pooling layer 256, 64, 64
        self.conv8 = nn.Conv2d(256, 512, 3, padding = 1)
        # 512, 64, 64
        self.bn8 = nn.BatchNorm2d(512)
        self.conv9 = nn.Conv2d(512, 512, 3, padding = 1)
        # 512, 64, 64
        self.bn9 = nn.BatchNorm2d(512)
        # insert pooling layer 512, 32, 32
        self.conv10 = nn.Conv2d(512, 1024, 2, stride = 2, padding = 1)
        # 1024, 17, 17
        self.bn10 = nn.BatchNorm2d(1024)
        

        # max pooling layer
        self.mpool = nn.MaxPool2d(2,2)
        # average pooling layer
        self.apool = nn.AvgPool2d(2,2)

        # Fully connected layers
        self.fc1 = nn.Linear(1024*17*17, 1024)
        self.fc2 = nn.Linear(1024, 128)
        self.fc3 = nn.Linear(128, 8)

        # dropout layer
        self.dropout = nn.Dropout(p = 0.3)

    def forward(self, x):

        # do the forward pass through the network
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.apool(F.relu(self.bn2(self.conv2(x))))
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.apool(F.relu(self.bn4(self.conv4(x))))
        x = F.relu(self.bn5(self.conv5(x)))
        x = F.relu(self.bn6(self.conv6(x)))
        x = self.apool(F.relu(self.bn7(self.conv7(x))))
        x = F.relu(self.bn8(self.conv8(x)))
        x = self.mpool(F.relu(self.bn9(self.conv9(x))))
        x = F.relu(self.bn10(self.conv10(x)))

        x = x.view(-1, 1024*17*17)

        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))

        output = F.relu(self.fc3(x))

        return output

In [19]:
import json
import torch.optim as optim

use_cuda = torch.cuda.is_available()

# create the network
network = PDnet()

# create the loss function and the optimizer
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(network.parameters(), lr=0.003)

# find the gpus
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# putting the neural network on the gpus
if torch.cuda.device_count() > 1:
    network = nn.DataParallel(network, device_ids = [0,1,2,3])
    
network = network.to(device)

epochs = 100

train_losses, test_losses = [], []
losses = {}

In [20]:
torch.cuda.device_count()

4

In [None]:
for e in range(epochs):
    tot_train_loss = 0
    i = 0
    for images, labels in trainloader:
        
        optimizer.zero_grad()
        
        # move the images and the labels to the gpus
        images = images.to(device)
        images = images.float()
        labels = labels.to(device)
        labels = labels.float()
        
        #forward prop
        output = network.forward(images)
        
        # find the loss
        loss = criterion(output, labels)
        tot_train_loss += loss.item()

        # back propagation
        loss.backward()
        
        # change the weights according to the loss function
        optimizer.step()
        
    tot_test_loss = 0

    # Turn off gradients for validation, saves memory and computations
    with torch.no_grad():
        for images, labels in testloader:

            images = images.to(device)
            images = images.float()
            labels = labels.to(device)
            labels = labels.float()

            output = network.forward(images)

            loss = criterion(output, labels)
            tot_test_loss += loss.item()

        # Get mean loss to enable comparison between train and test sets
        train_loss = tot_train_loss / len(trainloader.dataset)
        test_loss = tot_test_loss / len(testloader.dataset)

        # At completion of epoch
        train_losses.append(train_loss)
        test_losses.append(test_loss)

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(train_loss),
              "Test Loss: {:.3f}.. ".format(test_loss))


        losses[e+1] = [train_loss, test_loss]
        with open('losses.json', 'w') as fp:
            json.dump(losses, fp)

        ckpt_name = 'ckpt/checkpoint_'+str(e+1)+'.pth'
        torch.save(network.state_dict(), ckpt_name)

In [33]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()