In [20]:
import os
import numpy as np
import pickle as pkl
import torch
from torchvision.utils import save_image
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
import cv2
from torchvision.transforms import ToTensor, Normalize
import pickle
import pandas as pd
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
batch_size = 32


cuda


In [21]:
#data loading
class LazyDataset(Dataset):
    def __init__(self, path, train = True, transform = None):
        self.transform = transform
        path = path + ("train/" if train else "test/")

        self.pathX = path + "X/"
        self.pathY = path + "Y/"

        self.data = os.listdir(self.pathX)

    def __getitem__(self, idx):
        f = self.data[idx]

        img0 = cv2.imread(self.pathX + f + "/rgb/0.png")
        img1 = cv2.imread(self.pathX + f + "/rgb/1.png")
        img2 = cv2.imread(self.pathX + f + "/rgb/2.png")
        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
            img2 = self.transform(img2)
            
        depth = np.load(self.pathX + f + "/depth.npy")
        field_id = pkl.load(open(self.pathX + f + "/field_id.pkl","rb"))
        
        Y = np.load(self.pathY + f + ".npy")
        Y = torch.from_numpy(Y)
        depth = torch.from_numpy(depth)

        return (img0, img1, img2, depth, field_id), Y

    def __len__(self):
        return len(self.data)


In [22]:
train_dataset = LazyDataset('./lazydata/', transform = transforms.Compose([transforms.ToTensor()]))
test_dataset = LazyDataset('./lazydata/', train = False, transform = transforms.Compose([transforms.ToTensor()]))
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = batch_size * 2)


In [23]:
# (img0, img1, img2, depth, field_id), Y = train_dataset[0]
# img0.shape, img1.shape, img2.shape
# type(depth)
# len(train_dataset)
# npdata = train_dataset.numpy()

In [24]:
# resnet = models.resnet18(pretrained = True)
# couldn't get resnet to work with lazy loading

In [25]:
class CNN(nn.Module):
    #using the VGG16 architecture since VGG16 takes correct input tensor size we are using
    def __init__(self):
        super(CNN, self).__init__()
        # https://medium.com/@mygreatlearning/everything-you-need-to-know-about-vgg16-7315defb5918
        
        self.conv1 = nn.Conv2d(in_channels=12, out_channels=6, kernel_size=3, padding=1)
       
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=6, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=3, padding=1)
       
        self.conv4 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, padding=1)
        
        self.conv6 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=3, padding=1)
        self.conv7 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=3, padding=1)
        self.conv8 = nn.Conv2d(in_channels=24, out_channels=48, kernel_size=3, padding=1)
        
        self.conv9 = nn.Conv2d(in_channels=48, out_channels=48, kernel_size=3, padding=1)
        self.conv10 = nn.Conv2d(in_channels=48, out_channels=48, kernel_size=3, padding=1)
        self.conv11 = nn.Conv2d(in_channels=48, out_channels=96, kernel_size=3, padding=1)
        
        self.conv12 = nn.Conv2d(in_channels=96, out_channels=96, kernel_size=3, padding=1)
        self.conv13 = nn.Conv2d(in_channels=96, out_channels=96, kernel_size=3, padding=1)

        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc14 = nn.Linear(4704, 4096)
        # https://neurohive.io/en/popular-networks/vgg16/ 7 x 7 max pooling 
        # 96 x 7 x 7 
        self.fc15 = nn.Linear(4096, 4096)
        #vgg16 architecture standard is fully connected with 4096 nodes
        self.fc16 = nn.Linear(4096, 12)
        #and we have 4 x 3 for our input channels 

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.maxpool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.maxpool(x)
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = F.relu(self.conv7(x))
        x = self.maxpool(x)
        x = F.relu(self.conv8(x))
        x = F.relu(self.conv9(x))
        x = F.relu(self.conv10(x))
        x = self.maxpool(x)
        x = F.relu(self.conv11(x))
        x = F.relu(self.conv12(x))
        x = F.relu(self.conv13(x))
        x = self.maxpool(x)
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc14(x))
        x = F.dropout(x, 0.5) 
        x = F.relu(self.fc15(x))
        x = F.dropout(x, 0.5)
        x = self.fc16(x)
        return x

In [31]:
def train(epoch, dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num = len(dataloader)
    model.train()

    for batch, (X, y) in enumerate(dataloader):
        #print(X[0])
        # make sure all your variables have the same device as the model
        x0,x1,x2,depths, field_id = X
   
        x0 = torch.concat((x0, depths[:,0:1]), 1) 
        x1 = torch.concat((x1, depths[:,1:2]), 1) 
        x2 = torch.concat((x2, depths[:,2:3]), 1) 
        # print(x0.shape)
        # print(x1.shape)
        # print(x2.shape)
        # print(depths.shape)

        x_concat = torch.concat((x0,x1,x2),1)
        x = x_concat.to(device)
        y = y.to(device)
        #compute prediction error
        pred = model(x).to(device)
        loss = loss_fn(pred, y)

        #Back propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 1 == 0:
            # loss, current = loss.item(), batch * len(X)
            loss, current = loss.item(), batch 
            print(
                'Test Epoch: {} [{}/{}] \nLoss: {:.4f}'.format(
                epoch, current, size,
                100. * batch / num, loss))
            

In [32]:
#recitation
def test(dataloader, model, loss_fn): 
    size = len(dataloader.dataset) 
    num_batches = len(dataloader)
    model.eval()
    
    test_loss, correct = 0, 0 
    with torch.no_grad(): #doing this stops torch from building computation graphs and managing/calculating gradients (hence you can get away with more batch size in val/test dataloaders)
        for X, y in dataloader:
            y = y.to(device)

            pred = model(X[0])
            test_loss += loss_fn(pred, y).item() # no need to run backward/optimizer since gradients are not to be computed
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

In [33]:
torch.cuda.empty_cache()

In [34]:
model = CNN().to(device)
lr = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
#optimizer = torch.optim.SGD(model.parameters(), lr = lr)
loss_fn = nn.L1Loss()
#loss_fn = nn.MSELoss() 

In [38]:
for epoch in range(0,100):
    train(epoch, train_loader, model, loss_fn, optimizer)

Test Epoch: 0 [0/3396] 
Loss: 0.0000
Test Epoch: 0 [1/3396] 
Loss: 0.9346
Test Epoch: 0 [2/3396] 
Loss: 1.8692
Test Epoch: 0 [3/3396] 
Loss: 2.8037
Test Epoch: 0 [4/3396] 
Loss: 3.7383
Test Epoch: 0 [5/3396] 
Loss: 4.6729
Test Epoch: 0 [6/3396] 
Loss: 5.6075
Test Epoch: 0 [7/3396] 
Loss: 6.5421
Test Epoch: 0 [8/3396] 
Loss: 7.4766
Test Epoch: 0 [9/3396] 
Loss: 8.4112
Test Epoch: 0 [10/3396] 
Loss: 9.3458
Test Epoch: 0 [11/3396] 
Loss: 10.2804
Test Epoch: 0 [12/3396] 
Loss: 11.2150
Test Epoch: 0 [13/3396] 
Loss: 12.1495
Test Epoch: 0 [14/3396] 
Loss: 13.0841
Test Epoch: 0 [15/3396] 
Loss: 14.0187
Test Epoch: 0 [16/3396] 
Loss: 14.9533
Test Epoch: 0 [17/3396] 
Loss: 15.8879
Test Epoch: 0 [18/3396] 
Loss: 16.8224
Test Epoch: 0 [19/3396] 
Loss: 17.7570
Test Epoch: 0 [20/3396] 
Loss: 18.6916
Test Epoch: 0 [21/3396] 
Loss: 19.6262
Test Epoch: 0 [22/3396] 
Loss: 20.5607
Test Epoch: 0 [23/3396] 
Loss: 21.4953
Test Epoch: 0 [24/3396] 
Loss: 22.4299
Test Epoch: 0 [25/3396] 
Loss: 23.3645
Test Ep

In [36]:
torch.cuda.empty_cache()

In [37]:
rootdir = 'C:/Users/Osc/Desktop/pytorch_project'
#rootdir = ' '
outfile = rootdir + 'submission.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
preds = []

test_data = torch.load(rootdir + '/test/test/testX.pt')
rgb, depths, field_id = test_data
rgb0 = rgb[:,0]
rgb1 = rgb[:,1]
rgb2 = rgb[:,2]
print(depths.shape)
print(rgb1.shape)
rgb_data_0 = torch.concat((rgb0, depths[:,0:1]), 1) 
rgb_data_1 = torch.concat((rgb1, depths[:,1:2]), 1) 
rgb_data_2 = torch.concat((rgb2, depths[:,2:3]), 1)

rgb_data_concat = torch.concat((rgb_data_0,rgb_data_1,rgb_data_2), 1)
rgb_data = rgb_data_concat.to(device)

file_ids = test_data[-1]
# rgb_data = test_data[0:4]
model.eval()

for i, data in enumerate(rgb_data):
    # Please remember to modify this loop, input and output based on your model/architecture
    #print(data.shape)
    # output = model(data[:1, :, :, :].to('cuda'))
    output = model(rgb_data[i:i+1])
    preds.append(output[0].cpu().detach().numpy())

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

torch.Size([849, 3, 224, 224])
torch.Size([849, 3, 224, 224])
Written to csv file C:/Users/Osc/Desktop/pytorch_projectsubmission.csv
