In [None]:
import numpy as np
import os, sys
sys.path.append('../')
from src.datasets.soccernet_generic import soccernet_dataset_generic
from src.utils.helper import samples_by_language
import torch
from torch.utils.data import Dataset, DataLoader

In [None]:
root_dir = "/work/oarongve/data/sound_dataset/SoccerNet-code/data/"
train_list = "/work/oarongve/data/sound_dataset/SoccerNet-code/data/listgame_Train_300.npy"
valid_list = "/work/oarongve/data/sound_dataset/SoccerNet-code/data/listgame_Valid_100.npy"
test_list = "/work/oarongve/data/sound_dataset/SoccerNet-code/data/listgame_Test_100.npy"


In [None]:
%%bash
which python3

In [None]:
%%bash
nvidia-smi

In [None]:
langpath_train = '/work/oarongve/project-daredevil/project-daredevil/language-annotations/annotations/train_lang_dict.json'
langpath_valid = '/work/oarongve/project-daredevil/project-daredevil/language-annotations/annotations/valid_lang_dict.json'
langpath_test = '/work/oarongve/project-daredevil/project-daredevil/language-annotations/annotations/test_lang_dict.json'

samples_train_all = samples_by_language(langpath_train,train_list,'all')
samples_valid_all = samples_by_language(langpath_valid,valid_list,'all')
samples_test_all = samples_by_language(langpath_test,test_list,'all')

samples_train_english = samples_by_language(langpath_train,train_list,'english')
samples_valid_english = samples_by_language(langpath_valid,valid_list,'english')
samples_test_english = samples_by_language(langpath_test,test_list,'english')

samples_train_other = samples_by_language(langpath_train,train_list,'other')
samples_valid_other = samples_by_language(langpath_valid,valid_list,'other')
samples_test_other = samples_by_language(langpath_test,test_list,'other')


In [None]:
train_set_all = soccernet_dataset_generic(npy_file=train_list,root_dir=root_dir,lang='all',lang_dict=langpath_train)
valid_set_all = soccernet_dataset_generic(npy_file=valid_list,root_dir=root_dir,lang='all',lang_dict=langpath_valid)

train_set_english = soccernet_dataset_generic(npy_file=train_list,root_dir=root_dir,lang='english',lang_dict=langpath_train)
valid_set_english = soccernet_dataset_generic(npy_file=valid_list,root_dir=root_dir,lang='english',lang_dict=langpath_valid)

train_set_other = soccernet_dataset_generic(npy_file=train_list,root_dir=root_dir,lang='other',lang_dict=langpath_train)
valid_set_other = soccernet_dataset_generic(npy_file=valid_list,root_dir=root_dir,lang='other',lang_dict=langpath_valid)

In [None]:
train_set.describe()

In [None]:
valid_set.describe()

In [None]:
train_set.load_waves()
valid_set.load_waves()


In [None]:
train_set.generate_mel_spectrograms(load_features=True)
valid_set.generate_mel_spectrograms(load_features=True)


In [None]:
train_set.load_resnet_features()
valid_set.load_resnet_features()


In [None]:
w = 4

In [None]:
train_set.set_window_size(w)
valid_set.set_window_size(w)

# Train on resnet features

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self,window_size):
        super(Net, self).__init__()
        self.window_size = window_size
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=128, kernel_size=(512,1))
        self.bn1 = nn.BatchNorm2d(self.conv1.out_channels)
        self.conv2 = nn.Conv2d(self.conv1.out_channels, 64, 1)
        self.bn2 = nn.BatchNorm2d(self.conv2.out_channels)
        self.fc1 = nn.Linear(self.conv2.out_channels*(window_size*2), 120)
        self.fc2 = nn.Linear(self.fc1.out_features, 84)
        self.fc3 = nn.Linear(84, 4)

    def forward(self, x):
        
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = x.reshape(x.size(0),-1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

net = Net(w)
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(net.parameters(), lr=0.025, momentum=0.9)

In [None]:
# Disable warnings
import warnings
warnings.filterwarnings('ignore')

params = {'batch_size': 24,
         'shuffle': True,
         'num_workers':4,
         'drop_last':True}

In [None]:
len(train_set)

In [None]:
len(valid_set)

In [None]:
train_set.set_window_size(w)
valid_set.set_window_size(w)

In [None]:
trainloader = DataLoader(train_set,**params)
validloader = DataLoader(valid_set,**params)

In [None]:

net.to(device)
epochs = 10
accs = list()
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader,0):
        inputs = data['resnet_spot'].unsqueeze(1).permute(0,1,3,2).to(device)
        target = data['label'].to(device)
        optimizer.zero_grad()


        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

            if i % 5 == 0:    # print every 2000 mini-batches

                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 5))
                running_loss = 0.0

    # calculate accuracy
    with torch.no_grad():
        net.eval()
        res = torch.zeros((4,4))
        for i, data in enumerate(validloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs = data['resnet_spot'].unsqueeze(1).permute(0,1,3,2).to(device)
            label = data['label'].to(device)

            # forward + backward + optimize
            outputs = net(inputs)

            preds = torch.argmax(outputs,dim=1)

            for p,gt in zip(preds,label):
                res[int(p),int(gt)] += 1




        N_total = res.sum()
        N_correct = res.diag().sum()

        acc = N_correct / N_total

        print(f"Epoch : {epoch}, Accuracy : {acc}")
        accs.append(acc)

        print('Finished Training')


# Train on audio

In [None]:
import torchvision
resnet = torchvision.models.resnet18(pretrained=True)
resnet.conv1 = nn.Conv2d(1,64,kernel_size=(7,7),stride=(2,2),padding=(3,3),bias=False)
resnet.fc = nn.Linear(512,4,bias=True)
#resnet.features.conv0 = nn.Conv2d(1, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
#resnet.classifier = nn.Linear(in_features=densenet.classifier.in_features, out_features=3,bias=True)
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet.parameters(), lr=0.0001, momentum=0.9)


In [None]:
resnet.to(device)
epochs = 10
accs_ms = list()
for epoch in range(epochs):
    for i, data in enumerate(trainloader,0):
        
        resnet.train()
        inputs = data['ms_spot'].unsqueeze(1).permute(0,1,2,3).to(device)
        target = data['label'].to(device)
        inputs[inputs.isnan()] = 0.0
        optimizer.zero_grad()


        # forward + backward + optimize
        outputs = resnet(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            running_loss += loss.item()
            # print statistics


    # calculate accuracy
    with torch.no_grad():
        resnet.eval()
        res = torch.zeros((4,4))
        for i, data in enumerate(validloader
                                 , 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs = data['ms_spot'].unsqueeze(1).permute(0,1,2,3).to(device)
            label = data['label'].to(device)

            # forward + backward + optimize
            outputs = resnet(inputs)

            preds = torch.argmax(outputs,dim=1)

            for p,gt in zip(preds,label):
                res[int(p),int(gt)] += 1




        N_total = res.sum()
        N_correct = res.diag().sum()

        acc = N_correct / N_total
        accs_ms.append(acc)
        print(f"Epoch : {epoch}, Accuracy : {acc}")


print('Finished Training')


# Merge models 1 - softmax average during eval

In [None]:
# calculate accuracy


with torch.no_grad():
    resnet.eval()
    net.eval()
    
    res_visual = torch.zeros((4,4))
    res_audio = torch.zeros((4,4))
    res = torch.zeros((4,4))
    
    
    for i, data in enumerate(validloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs_audio = data['ms_spot'].unsqueeze(1).permute(0,1,2,3).to(device)
        inputs_visual = data['resnet_spot'].unsqueeze(1).permute(0,1,3,2).to(device)
        label = data['label'].to(device)
        
        inputs_audio[inputs_audio.isnan()] = 0.0

        # forward + backward + optimize
        outputs_audio = resnet(inputs_audio)
        outputs_visual = net(inputs_visual)

        fused_preds = torch.softmax(outputs_audio,dim=1) + torch.softmax(outputs_visual,dim=1)
        preds_audio = torch.argmax(outputs_audio,dim=1)
        preds_visual = torch.argmax(outputs_visual,dim=1)
        
        preds = torch.argmax(fused_preds,dim=1)
        for p,gt in zip(preds,label):
            res[int(p),int(gt)] += 1
        
        for p,gt in zip(preds_audio,label):
            res_audio[int(p),int(gt)] += 1
            
        for p,gt in zip(preds_visual,label):
            res_visual[int(p),int(gt)] += 1




    N_total = res.sum()
    N_correct = res.diag().sum()

    acc = N_correct / N_total
    accs_ms.append(acc)
    print(f"Epoch : {epoch}, Accuracy : {acc}")


In [None]:
def get_acc(res_a):
    N_total = res_a.sum()
    N_correct = res_a.diag().sum()

    acc = N_correct / N_total
    print(acc)

In [None]:
get_acc(res)

In [None]:
get_acc(res_audio)

In [None]:
get_acc(res_visual)

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class fusion(nn.Module):
    def __init__(self):
        super(fusion, self).__init__()
        self.fc1 = nn.Linear(256,256)
        self.fc2 = nn.Linear(256,128)
        self.fc3 = nn.Linear(128,3)
        self.bn = nn.BatchNorm1d(256)

    def forward(self, x_audio,x_visual):
        x = torch.cat((x_audio,x_visual),dim=1)
        print(x.size())
        x = x.view(-1,256)
        x = self.bn(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))

        return x

In [None]:
resnet.fc = nn.Linear(512,out_features=128,bias=True)
net.fc3 = nn.Linear(84,128,bias=True)

# Window sizes


In [None]:
from torch.optim.lr_scheduler import MultiStepLR

In [None]:
import torchvision
resnet = torchvision.models.resnet18(pretrained=True)
resnet.conv1 = nn.Conv2d(1,64,kernel_size=(7,7),stride=(2,2),padding=(3,3),bias=False)
resnet.fc = nn.Linear(512,4,bias=True)
#resnet.features.conv0 = nn.Conv2d(1, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
#resnet.classifier = nn.Linear(in_features=densenet.classifier.in_features, out_features=3,bias=True)
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet.parameters(), lr=0.0001, momentum=0.9)

In [None]:
wsizes = [2,4,8,16,32,64]

In [None]:
import torch.optim as optim
#for w in wsizes:
w = 4 
# set window_size for training and validation set
train_set.set_window_size(w)
valid_set.set_window_size(w)

resnet = torchvision.models.resnet18(pretrained=True)
resnet.conv1 = nn.Conv2d(1,64,kernel_size=(7,7),stride=(2,2),padding=(3,3),bias=False)
resnet.fc = nn.Linear(512,3,bias=True)
#resnet.features.conv0 = nn.Conv2d(1, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
#resnet.classifier = nn.Linear(in_features=densenet.classifier.in_features, out_features=3,bias=True)


criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet.parameters(), lr=0.001, momentum=0.9)
scheduler = MultiStepLR(optimizer, milestones=[5,10], gamma=0.1)

# train

resnet.to(device)

epochs = 5
accs_ms = list()
print(w)
for epoch in range(epochs):

    for i, data in enumerate(trainloader,0):


        resnet.train()
        inputs = data['ms_spot'].unsqueeze(1).permute(0,1,2,3).to(device)
        target = data['label'].to(device)
        inputs[inputs.isnan()] = 0.0
        optimizer.zero_grad()


        # forward + backward + optimize
        outputs = resnet(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()


        scheduler.step()
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            running_loss += loss.item()
            # print statistics


    # calculate accuracy
    with torch.no_grad():
        resnet.eval()
        res = torch.zeros((3,3))
        for i, data in enumerate(validloader
                                 , 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs = data['ms_spot'].unsqueeze(1).permute(0,1,2,3).to(device)
            label = data['label'].to(device)

            # forward + backward + optimize
            outputs = resnet(inputs)

            preds = torch.argmax(outputs,dim=1)

            for p,gt in zip(preds,label):
                res[int(p),int(gt)] += 1




        N_total = res.sum()
        N_correct = res.diag().sum()

        acc = N_correct / N_total
        accs_ms.append(acc)
        print(f"Epoch : {epoch}, Accuracy : {acc}")


print('Finished Training')

In [None]:
%%bash
nvidia-smi

In [None]:
for w in [2,4,8,16,32,64,128]:
    acc = 0
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    net = Net(w)
    import torch.optim as optim

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(net.parameters(), lr=0.025, momentum=0.9)
    net.to(device)
    epochs = 5
    accs = list()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader,0):
            inputs = data['resnet_spot'].unsqueeze(1).permute(0,1,3,2).to(device)
            target = data['label'].to(device)
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

                if i % 5 == 0:    # print every 2000 mini-batches

                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 5))
                    running_loss = 0.0

        # calculate accuracy
        with torch.no_grad():
            net.eval()
            res = torch.zeros((3,3))
            for i, data in enumerate(validloader, 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs = data['resnet_spot'].unsqueeze(1).permute(0,1,3,2).to(device)
                label = data['label'].to(device)

                # forward + backward + optimize
                outputs = net(inputs)

                preds = torch.argmax(outputs,dim=1)

                for p,gt in zip(preds,label):
                    res[int(p),int(gt)] += 1




            N_total = res.sum()
            N_correct = res.diag().sum()

            acc = N_correct / N_total

            print(f"Epoch : {epoch}, Accuracy : {acc}")
            accs.append(acc)

            print('Finished Training')
    print(f"window: {w} acc: {acc}")

# Audio windows

In [None]:
for w in [2,4,8,16,32,64,128]:
    acc = 0
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    train_set.set_window_size(w)
    valid_set.set_window_size(w)

    trainloader = DataLoader(train_set,**params)
    validloader = DataLoader(valid_set,**params)
    
    net = Net(w)
    import torch.optim as optim

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(net.parameters(), lr=0.025, momentum=0.9)
    net.to(device)
    epochs = 5
    accs = list()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader,0):
            inputs = data['resnet_spot'].unsqueeze(1).permute(0,1,3,2).to(device)
            target = data['label'].to(device)
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

                if i % 5 == 0:    # print every 2000 mini-batches

                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 5))
                    running_loss = 0.0

        # calculate accuracy
        with torch.no_grad():
            net.eval()
            res = torch.zeros((3,3))
            for i, data in enumerate(validloader, 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs = data['resnet_spot'].unsqueeze(1).permute(0,1,3,2).to(device)
                label = data['label'].to(device)

                # forward + backward + optimize
                outputs = net(inputs)

                preds = torch.argmax(outputs,dim=1)

                for p,gt in zip(preds,label):
                    res[int(p),int(gt)] += 1




            N_total = res.sum()
            N_correct = res.diag().sum()

            acc = N_correct / N_total

            print(f"Epoch : {epoch}, Accuracy : {acc}")
            accs.append(acc)

            print('Finished Training')
    print(f"window: {w} acc: {acc}")

In [None]:
import warnings
warnings.filterwarnings('ignore')

params = {'batch_size': 24,
         'shuffle': True,
         'num_workers':4,
         'drop_last':True}
import torchvision
import torch.nn as nn
import torch.optim as optim

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
accs_w = list()
for w in [2,4,8,16,32,64,128]:
    acc = 0
    running_loss = 0
    # fix loader
    train_set.set_window_size(w)
    valid_set.set_window_size(w)

    trainloader = DataLoader(train_set,**params)
    validloader = DataLoader(valid_set,**params)

    resnet = torchvision.models.resnet18(pretrained=True)
    resnet.conv1 = nn.Conv2d(1,64,kernel_size=(7,7),stride=(2,2),padding=(3,3),bias=False)
    resnet.fc = nn.Linear(512,4,bias=True)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(resnet.parameters(), lr=0.0001, momentum=0.9)
    resnet.to(device)
    
    epochs = 5
    accs = list()
    for epoch in range(epochs):
        for i, data in enumerate(trainloader,0):

            resnet.train()
            inputs = data['ms_spot'].unsqueeze(1).permute(0,1,2,3).to(device)
            target = data['label'].to(device)
            inputs[inputs.isnan()] = 0.0
            optimizer.zero_grad()


            # forward + backward + optimize
            outputs = resnet(inputs)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                running_loss += loss.item()
                # print statistics


        # calculate accuracy
        with torch.no_grad():
            resnet.eval()
            res = torch.zeros((4,4))
            for i, data in enumerate(validloader, 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs = data['ms_spot'].unsqueeze(1).permute(0,1,2,3).to(device)
                label = data['label'].to(device)

                # forward + backward + optimize
                outputs = resnet(inputs)

                preds = torch.argmax(outputs,dim=1)

                for p,gt in zip(preds,label):
                    res[int(p),int(gt)] += 1




            N_total = res.sum()
            N_correct = res.diag().sum()

            acc = N_correct / N_total
            
            print(f"Epoch : {epoch}, Accuracy : {acc}")
            accs.append(acc)
            
    accs_w.append(acc)
    print(f"window: {w} acc: {acc}")

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
accs_w_v = list()
for w in [2,4,8,16,32,64,128]:
    acc = 0
    running_loss = 0
    # fix loader
    train_set.set_window_size(w)
    valid_set.set_window_size(w)

    trainloader = DataLoader(train_set,**params)
    validloader = DataLoader(valid_set,**params)

    net = Net(w)
    import torch.optim as optim

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(net.parameters(), lr=0.025, momentum=0.9)
    
    epochs = 5
    accs = list()
    net.to(device)
    epochs = 10
    accs = list()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader,0):
            inputs = data['resnet_spot'].unsqueeze(1).permute(0,1,3,2).to(device)
            target = data['label'].to(device)
            optimizer.zero_grad()


            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

                if i % 5 == 0:    # print every 2000 mini-batches

                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 5))
                    running_loss = 0.0

        # calculate accuracy
        with torch.no_grad():
            net.eval()
            res = torch.zeros((4,4))
            for i, data in enumerate(validloader, 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs = data['resnet_spot'].unsqueeze(1).permute(0,1,3,2).to(device)
                label = data['label'].to(device)

                # forward + backward + optimize
                outputs = net(inputs)

                preds = torch.argmax(outputs,dim=1)

                for p,gt in zip(preds,label):
                    res[int(p),int(gt)] += 1




            N_total = res.sum()
            N_correct = res.diag().sum()

            acc = N_correct / N_total

            print(f"Epoch : {epoch}, Accuracy : {acc}")
            accs.append(acc)

            print('Finished Training')

        accs_w_v.append(accs) # get best acc, save best model
    print(f"window: {w} acc: {acc}")

In [None]:
# put into more clean place