<a href="https://colab.research.google.com/github/tylim9307/Shot-Boundary-Detection/blob/main/Shot_Boundary_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Overview of the notebook

1. [Model implementation](#section_1)
2. [Generating the Training Data](#section_2)
3. [Training](#section_3)

### Library

In [22]:
import pandas as pd
import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import IPython.display as display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

## 1. Model Implmentation <a class="anchor" id="section_1"></a>

* Model Input: 10 frames (64 x 64 RGB frames)
* Model Architecture: 4 layers of 3D convolutions, followed by ReLU, and final layer that classifies if the two center frames come from the same shot
* Loss Function: cross-entropy loss
* Optimization: SGD

In [23]:
#Define the Model
input_dim = (64, 64, 10 * 3)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv3d(in_channels=3,out_channels= 16, kernel_size = (5,5,3))
        self.conv2 = nn.Conv3d(in_channels = 16, out_channels = 24, kernel_size = (3,3,3))
        self.conv3 = nn.Conv3d(in_channels= 24, out_channels = 32, kernel_size = (3,3,3))
        self.conv4 = nn.Conv3d(in_channels = 32, out_channels = 12, kernel_size = (6,6,1))
        self.pool = nn.MaxPool3d((51,51,1))
        self.fc = nn.Linear(4*12, 2)
        self.softmax = nn.Softmax()
        
      

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(F.relu(self.conv4(x)))
        x = x.view(-1, 4*12)
        x = self.fc(x)
        x = self.softmax(x)
        
        return x


net = Net()

In [24]:
#Define the loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# 2. Generating the Training Data <a class="anchor" id="section_2"></a>

Due to the lack of time and source of the data for pictures, I will replace the image with random noise.

In [25]:
#Generating array for as sample data (3,64,64,10) as (channel,h,w,t) with Gaussian noise
num_samples = 1000
input = np.random.rand(num_samples, 3,64,64,10)
print(input[0].shape)

#Generating random label
label = [random.randrange(2) for i in range(1000)]

(3, 64, 64, 10)


# 3. Training <a class="anchor" id="section_3"></a>

In [26]:
batch_size = 32
#Create dataloader
input = np.asarray(input)
label = np.asarray(label)
input = torch.tensor(input).float()
label = torch.tensor(label)
x_train, x_test, y_train, y_test = train_test_split(input, label, random_state = 42)
train_data = torch.utils.data.TensorDataset(x_train, y_train)
test_data = torch.utils.data.TensorDataset(x_test, y_test)
trainloader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True)
testloader = torch.utils.data.DataLoader(test_data, batch_size = batch_size, shuffle = False)

In [27]:
#Check if model works with a batch of 1
x = input[0].unsqueeze(dim=0).float()
net(x)



tensor([[0.4964, 0.5036]], grad_fn=<SoftmaxBackward>)

In [28]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        #pred = np.argmax(outputs.detach().numpy(),axis = 1)
        #pred = torch.tensor(pred).float()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')



KeyboardInterrupt: ignored