## 1. Libraries and Setup

In [2]:
from google.colab import drive
import json
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
!cp /content/gdrive/MyDrive/converted_224x224.tar.gz /content
%cd /content
!tar -xvzf converted_224x224.tar.gz

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
converted_224x224/frame_174866.jpg
converted_224x224/frame_106209.jpg
converted_224x224/frame_183385.jpg
converted_224x224/frame_056394.jpg
converted_224x224/frame_065265.jpg
converted_224x224/frame_188455.jpg
converted_224x224/frame_162446.jpg
converted_224x224/frame_032771.jpg
converted_224x224/frame_188246.jpg
converted_224x224/frame_043820.jpg
converted_224x224/frame_035343.jpg
converted_224x224/frame_003077.jpg
converted_224x224/frame_141712.jpg
converted_224x224/frame_000073.jpg
converted_224x224/frame_159991.jpg
converted_224x224/frame_184300.jpg
converted_224x224/frame_129955.jpg
converted_224x224/frame_189617.jpg
converted_224x224/frame_033131.jpg
converted_224x224/frame_083162.jpg
converted_224x224/frame_012496.jpg
converted_224x224/frame_012634.jpg
converted_224x224/frame_004062.jpg
converted_224x224/frame_036170.jpg
converted_224x224/frame_147494.jpg
converted_224x224/frame_163792.jpg
converted_224x224/frame_0

In [None]:
!ls converted_224x224/

In [4]:
import os
import sys
import time

import numpy as np
import pandas as pd

import pdb
import gc
from tqdm.notebook import trange, tqdm
from PIL import Image

import torch
import torchvision 
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

import warnings
warnings.filterwarnings('ignore')


In [5]:
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")

num_workers = 8 if cuda else 0

print("Cuda = ", str(cuda), " with num_workers = ", str(num_workers),  " system version = ", sys.version)

Cuda =  True  with num_workers =  8  system version =  3.7.12 (default, Sep 10 2021, 00:21:48) 
[GCC 7.5.0]


## 2. Data Loading

### 2.1 Load Data

In [6]:
np.random.seed(0)
df = pd.read_csv("/content/gdrive/MyDrive/3+1.csv")
df = df.iloc[:, 1:]

train_end = int(len(df)*0.7)
val_end = int(len(df)*0.85)
train_data = df[:train_end]
val_data = df[train_end:val_end]
test_data = df[val_end:]

In [7]:
train_data = train_data.reset_index().drop('index',1)
val_data = val_data.reset_index().drop('index',1)
test_data = test_data.reset_index().drop('index',1)

### 2.2 Custom Dataset Class

In [14]:
# cur_dir = "data/qscale31_unique/"
cur_dir = "converted_224x224/"

In [15]:
# Define dataset class
class MyDataSet(Dataset):

    # load the dataset
    def __init__(self, data, **kwargs):
        self.X = data["frames"]
        self.Y = data["label"]

    # get number of items/rows in dataset
    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, index):
        x, y = self.X[index], self.Y[index]
        for c in ["(",")",",","'"]:
            x = x.replace(c, "")
        x = x.split(" ")
        images = []
        for img_file in x:
            img = Image.open(cur_dir + img_file)
            img = torchvision.transforms.ToTensor()(img)
            images.extend(img)
        x = torch.stack(images)
        return x, y
    

In [16]:
train_set = MyDataSet(train_data)
train_set[0][0].shape

torch.Size([12, 224, 224])

### 2.3 Dataloader

In [17]:
batch_size = 128

# training data
train_set = MyDataSet(train_data)
train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size, num_workers=8)

# validation data
val_set = MyDataSet(val_data)
val_loader = DataLoader(val_set, shuffle=False, batch_size=batch_size, num_workers=8)

# test data
test_set = MyDataSet(test_data)
test_loader = DataLoader(test_set, shuffle=False, batch_size=batch_size, num_workers=8)

## 3. Model

In [18]:
# This is the simplest possible residual block, with only one CNN layer.
# Looking at the paper, you can extend this block to have more layers, bottleneck, grouped convs (from shufflenet), etc.
# Or even look at more recent papers like resnext, regnet, resnest, senet, etc.
class ResidualBlock(nn.Module):
    def __init__(self, in_channel, out_channel,stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False, dilation = 1)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu1 = nn.ReLU(inplace = True)
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding= 1, bias=False, dilation = 1)
        self.bn2 =  nn.BatchNorm2d(out_channel)
        if stride == 1:
            self.shortcut = nn.Identity()
        else:
            self.shortcut = nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride)
        self.relu2 = nn.ReLU(inplace = True)
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu1(out)

        out = self.conv2(out)
        out = self.bn2(out)

        shortcut = self.shortcut(x) 
        out = self.relu2(out + shortcut)
        
        return out

In [20]:
# This has hard-coded hidden feature sizes.
# You can extend this to take in a list of hidden sizes as argument if you want.
class ClassificationNetwork(nn.Module):
    def __init__(self, in_features, num_classes,feat_dim = 512):
        super().__init__()
        
        self.layers = nn.Sequential(
            nn.Conv2d(in_features, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1),
            ResidualBlock(64,64),
            ResidualBlock(64,64),
            ResidualBlock(64,64),
            ResidualBlock(64,128,stride = 2),
            ResidualBlock(128,128),
            ResidualBlock(128,128),
            ResidualBlock(128,128),
            ResidualBlock(128,256,stride = 2),
            ResidualBlock(256,256),
            ResidualBlock(256,256),
            ResidualBlock(256,256),
            ResidualBlock(256,256),
            ResidualBlock(256,256),
            ResidualBlock(256,512,stride = 2),
            ResidualBlock(512,512),
            ResidualBlock(512,512),
            nn.AdaptiveAvgPool2d((1, 1)), # For each channel, collapses (averages) the entire feature map (height & width) to 1x1
            nn.Flatten(1), # the above ends up with batch_size x 512 x 1 x 1, flatten to batch_size x 512
        )
        self.linear_emb = nn.Linear(512, feat_dim)
        self.relu = nn.ReLU(inplace=True)
        self.linear_output_1 = nn.Linear(512,512)
        self.relu_output = nn.ReLU(inplace=True)
        self.dropout_output = nn.Dropout(p=0.4)
        self.linear_output_2 = nn.Linear(512,num_classes)
        # self.fc = nn.Linear(512,num_classes)
        
        for m in self.modules():
          if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          elif isinstance(m, (nn.BatchNorm2d)):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0) 

        for m in self.modules():
          if isinstance(m, ResidualBlock):
            nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]  

    def forward(self, x, return_embedding=False):
        embedding = self.layers(x)
        embedding_out = self.linear_emb(embedding)
        embedding_out = self.relu(embedding_out)

        output = self.linear_output_1(embedding)
        output = self.relu_output(output)
        output = self.dropout_output(output)
        output = self.linear_output_2(output)
        # output = self.fc(embedding)

        if return_embedding:
            return embedding_out,output
        else:
            return output 

In [21]:
numEpochs = 500
in_features = 12 # TODO: change RGB channels according to 4 * 3

learningRate = 0.1
weightDecay = 1e-4

num_classes = 24 # TODO: change the number of classes according to 4!

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

network = ClassificationNetwork(in_features, num_classes)
#network.load_state_dict(torch.load("model_checkpoints/resnet34/lr_0.1-2/model_2.pt"))
network = network.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(network.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.7, verbose=True)

In [None]:
# Train!
max_val_acc = 0
for epoch in range(numEpochs):
    # Train
    network.train()
    avg_loss = 0.0
    avg_train_acc = 0.0
    with tqdm(train_loader) as t:
      for batch_num, (x, y) in enumerate(train_loader):
        t.set_description("Training")
        optimizer.zero_grad()
        x, y = x.to(device), y.to(device)
        outputs = network(x)
        num_train_correct = (torch.argmax(outputs, axis=1) == y).sum().item()
        num_labels = len(y)
        avg_train_acc += (num_train_correct/num_labels)

        loss = criterion(outputs, y.long())
        loss.backward()
        optimizer.step()
        avg_loss += loss.item()

        if batch_num % 50 == 49:
            print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}\tTraining Accuracy : {:.4f}'.format(epoch, batch_num+1, avg_loss/50, avg_train_acc/50))
            avg_loss = 0.0
            avg_train_acc = 0.0

        torch.cuda.empty_cache()
        del x
        del y
        del loss
    
    # Validate
    network.eval()
    avg_val_loss = 0.0
    num_correct = 0
    with tqdm(val_loader) as t:
      for batch_num, (x, y) in enumerate(val_loader):
        t.set_description("Evaluating")
        x, y = x.to(device), y.to(device)
        outputs = network(x)
        num_correct += (torch.argmax(outputs, axis=1) == y).sum().item()
        loss = criterion(outputs, y.long())
        avg_val_loss += loss.item()

    avg_val_loss = avg_val_loss / len(val_loader)
    val_acc = num_correct / len(val_set)
    checkpoint_name = "/content/gdrive/MyDrive/DL/12Channels/model_" + str(epoch) + ".pt"
    torch.save(network.state_dict(), checkpoint_name)
    if val_acc > max_val_acc:
        max_val_acc = val_acc
        torch.save(network.state_dict(), "/content/gdrive/MyDrive/DL/12Channels/best_model.pt")
    scheduler.step(avg_val_loss)
    print('Epoch: {}, Validation Loss: {:.3f}, Validation Accuracy: {:.3f}'.format(epoch, avg_val_loss, val_acc))

In [33]:
# load model

print("Loading model")
model_save_name = 'model_45.pt'
path = F"/content/gdrive/MyDrive/DL/12Channels/{model_save_name}"
model_dict = torch.load(path)
print(model_dict)
network.load_state_dict(model_dict)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
          [ 3.5942e-02, -1.7639e-02,  7.4991e-02]],

         [[ 2.3319e-02,  1.0038e-02, -2.3945e-02],
          [-5.8173e-02, -3.1091e-02,  1.9034e-02],
          [-1.8731e-02, -8.0604e-02,  7.1790e-02]],

         [[-1.5946e-02, -6.1967e-03,  6.0011e-03],
          [-5.4927e-03,  8.3467e-03, -1.9044e-02],
          [-2.0922e-02,  1.2849e-02,  2.3043e-02]]],


        [[[-2.8545e-02,  2.5849e-02, -3.0702e-03],
          [-1.3309e-02,  4.1904e-02, -8.1036e-03],
          [-8.6677e-03,  3.3215e-02,  3.2997e-02]],

         [[-2.4751e-02, -1.5588e-02, -7.0688e-03],
          [-2.7249e-02, -3.1740e-02,  9.3655e-03],
          [-9.1145e-04, -6.4079e-03,  2.5555e-02]],

         [[-1.0123e-02,  1.7472e-02,  1.8774e-03],
          [ 3.0086e-02,  1.7221e-02, -1.0894e-02],
          [-2.0277e-02,  3.8434e-02,  8.6161e-03]],

         ...,

         [[ 1.6341e-04,  7.4785e-02,  2.6502e-03],
          [ 4.3356e-02,  5.0170e-02, -3

<All keys matched successfully>

In [34]:
# Test

network.eval()
num_correct = 0
for batch_num, (x, y) in enumerate(test_loader):
  x, y = x.to(device), y.to(device)
  outputs = network(x)
  num_correct += (torch.argmax(outputs, axis=1) == y).sum().item()

test_acc = num_correct / len(test_set)
print(test_acc)


0.2901296111665005
