In [14]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
import torchaudio

In [15]:
root_dir = "/work/oarongve/data/sound_dataset/SoccerNet-code/data/"
train_file = root_dir+"listgame_Train_300.npy"
valid_file = root_dir+"listgame_Valid_100.npy"
test_file = root_dir+"listgame_Test_100.npy"

# Librosa test

In [16]:
import librosa
import matplotlib.pyplot as plt

In [17]:
SAMPLE_RATE = 8000 # Should be 16 kHz
WINDOW_LENGTH = int(0.025 * SAMPLE_RATE) # Should be 25 ms
N_FFT = 512 # Should be 512
STEP_SIZE = int(0.01 * SAMPLE_RATE) # Should be 10ms


In [18]:
# Clip dataset
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch
import os
import json
import torchvision
import datetime
import torchaudio
import librosa
from subprocess import Popen, PIPE
import re



class SoccerNetDataset(Dataset):
    """Soccernet Dataset"""
    
    SAMPLE_RATE = 8000 # Should be 16 kHz
    WINDOW_LENGTH = int(0.025 * SAMPLE_RATE) # Should be 25 ms
    N_FFT = 512 # Should be 512
    STEP_SIZE = int(0.01 * SAMPLE_RATE) # Should be 10ms
    
    def __init__(self,npy_file,
                 root_dir,
                 transform=None,
                 background=False,
                 wsize=4):
    
        self.wsize = wsize
        self.npy_file = np.load(npy_file)
        self.samples = list() # maybe change structure later depending on efficiency        
        self.root_dir = root_dir
        self.transform = transform

        for e in self.npy_file:
            path, annotations = self.get_annotations(e)

            duration1 = self.getVideoLength(self.root_dir + e + "/1.mkv")
            duration2 = self.getVideoLength(self.root_dir + e + "/2.mkv")
            #print(f"duration1 : {duration1}, duration2: {duration2}")
            for annotation in annotations:
                # Check that annotations hold correct labels
                        if ("card" in annotation["label"]) or ("subs" in annotation["label"]) or ("soccer" in annotation["label"]):
                            annotation["duration1"] = duration1
                            annotation["duration2"] = duration2
                            self.samples.append([path,annotation])

       
    def __len__(self):
        return len(self.samples)
    
    def getVideoLength(self,video_file):
        res = Popen(['ffmpeg', '-i', video_file, '-hide_banner'],stdout=PIPE,stderr=PIPE)
        none,meta = res.communicate()
        meta_out = meta.decode()
        #---| Take out info
        duration = re.search(r'Duration:.*', meta_out)
        return duration.group()[:21]

    def __getitem__(self,idx):
        """Returns a sample containing video path, clip and label"""
        if torch.is_tensor(idx):
            idx.tolist()
        
        # get annotations
        time_half = int(self.samples[idx][1]["gameTime"][0])
        time_minute = int(self.samples[idx][1]["gameTime"][-5:-3])
        time_second = int(self.samples[idx][1]["gameTime"][-2:])
        annotation = self.samples[idx][1]

        # Get label
        if ("card" in annotation["label"]): label = 0
        elif ("subs" in annotation["label"]): label = 1
        elif ("soccer" in annotation["label"]): label = 2
        elif ("background" in annotation["label"]): label = 3
        else: 
            print("Warning, label not compatible with set")
            return

        # Get audiopath
        audiopath = os.path.join(self.root_dir,
                              str(self.samples[idx][0]),
                                str(time_half)+"_audio.wav")
        

        
        one_hot_label = np.zeros(4)
        one_hot_label[label] = 1
        # Get video frames 
        
        # get start in second, use labeled time as center TODO: fix centerframe as keyframe and stride
        fps = 25.0 # assume fps = 25 for now, should be so
        start_sec = time_minute*60 + time_second
        end_sec = start_sec
        
        if start_sec == 0:
            end_sec += (1/fps) # possibly unstable solution
            
        end_sec = end_sec + self.wsize # might need to subtract 1/fps
        # Shift backwards to center around time but check that time > 0
        diff = (end_sec - start_sec) / 2 # TODO : Might result in bad precision
        temp_start_sec = start_sec - diff
        temp_end_sec = end_sec - diff

        # Only change as long as the shift operation doesnt shift out of bounds 
        if temp_start_sec >= 0:
            start_sec = temp_start_sec
            end_sec = temp_end_sec
        
        # Buffer to endsec incase of bad load
        end_sec = end_sec + 0.9 # loads more frames than needed, then reduced later
        
        y, sr = librosa.load(audiopath,sr=SAMPLE_RATE,offset=start_sec,duration=self.wsize+1)
        
        print(len(y))
        y = y[:SAMPLE_RATE*4]
        
        ms = torchaudio.transforms.MelSpectrogram(sample_rate=SAMPLE_RATE,
                                                n_fft=N_FFT,
                                                win_length=WINDOW_LENGTH,
                                                hop_length=STEP_SIZE)(torch.Tensor(y)).log10().unsqueeze(0)
        print(f"ms size = {ms.size()}")
        
        
        ms = ms[:,:,:401]
        if ms.size() != (1,128,401):
            print(f"ms size :{ms.size()}, using zeros instead ...")
            ms = torch.zeros((1,128,401))
        
        sample = {'audiopath': audiopath, 'annotation':annotation,'idx':idx,'audio':y,'sr':sr,'mel_spectogram':ms, 'one_hot_label':one_hot_label,'label':label}
        
        return sample
            
    def get_annotations(self,path):
        """ Reads json files and returns """
        with open(self.root_dir+path+"/Labels.json") as jsonfile:
            json_label = json.load(jsonfile)
        
        labels = [e for e in json_label['annotations']]
        
        return path,labels
    def get_keyframe(self,idx):
        if self.frame_center == 'back': return self.__getitem__(idx)['clip'][0,:,:,:]
        elif self.frame_center == 'center': return self.__getitem__(idx)['clip'][self.nframes//2,:,:,:]
        elif self.frame_center == 'front': return self.__getitem__(idx)['clip'][self.nframes-1,:,:,:]
    def describe(self):
        card = 0
        subs = 0
        goal = 0
        background = 0

        for sample in self.samples:
            annotation = sample[1]
        # Get label
            if ("card" in annotation["label"]): card += 1
            elif ("subs" in annotation["label"]): subs +=1
            elif ("soccer" in annotation["label"]): goal += 1
            elif ("background" in annotation["label"]): background += 1

        print("Description of dataset\n\n")
        print("\n ********* Classes *********")
        print("\n card = 0\n subs = 1\n goals = 2\n background = 3")

        print("\n ********* Distribution and count *********")
        print(f"\n N card: {card} \n N subs: {subs} \n N goal: {goal} \n N background: {background} \n \n Total : {card+subs+goal+background}")
        
        print("\n\n ********* Configuration *********")
        print(f"\n npy_file: {self.npy_file} \n tshift: {self.tshift} \n root_dir: {self.root_dir} \n transform: {self.transform} \n frame_center: {self.frame_center} \n nframes: {self.nframes} \n stride_frames: {self.stride_frames} \n background: {self.background}")
        print("\n\n ********* End of description *********")

In [19]:
X = SoccerNetDataset(root_dir=root_dir,npy_file=train_file)

In [12]:
# Disable warnings
import warnings
warnings.filterwarnings('ignore')

In [13]:
for e in X:
    print(e['mel_spectogram'].size())

40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.

40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.

40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.

40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.

40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.

40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.

40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.Size([1, 128, 401])
40000
ms size = torch.Size([1, 128, 401])
torch.

KeyboardInterrupt: 

In [None]:
params = {'batch_size': 8,
         'shuffle': False,
         'num_workers':4}

In [None]:
dataloader = DataLoader(X,**params)

In [None]:
%%bash
nvidia-smi

# Save to numpy

In [20]:
# Clip dataset
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch
import os
import json
import torchvision
import datetime
import torchaudio
import librosa
from subprocess import Popen, PIPE
import re



class SoccerNetDataset(Dataset):
    """Soccernet Dataset"""
    
    SAMPLE_RATE = 8000 # Should be 16 kHz
    WINDOW_LENGTH = int(0.025 * SAMPLE_RATE) # Should be 25 ms
    N_FFT = 512 # Should be 512
    STEP_SIZE = int(0.01 * SAMPLE_RATE) # Should be 10ms
    
    def __init__(self,npy_file,
                 root_dir,
                 transform=None,
                 background=False,
                 wsize=4):
    
        self.wsize = wsize
        self.npy_file = np.load(npy_file)
        self.samples = list() # maybe change structure later depending on efficiency        
        self.root_dir = root_dir
        self.transform = transform

        for e in self.npy_file:
            path, annotations = self.get_annotations(e)

            duration1 = self.getVideoLength(self.root_dir + e + "/1.mkv")
            duration2 = self.getVideoLength(self.root_dir + e + "/2.mkv")
            #print(f"duration1 : {duration1}, duration2: {duration2}")
            for annotation in annotations:
                # Check that annotations hold correct labels
                        if ("card" in annotation["label"]) or ("subs" in annotation["label"]) or ("soccer" in annotation["label"]):
                            annotation["duration1"] = duration1
                            annotation["duration2"] = duration2
                            self.samples.append([path,annotation])

       
    def __len__(self):
        return len(self.samples)
    
    def getVideoLength(self,video_file):
        res = Popen(['ffmpeg', '-i', video_file, '-hide_banner'],stdout=PIPE,stderr=PIPE)
        none,meta = res.communicate()
        meta_out = meta.decode()
        #---| Take out info
        duration = re.search(r'Duration:.*', meta_out)
        return duration.group()[:21]

    def __getitem__(self,idx):
        """Returns a sample containing video path, clip and label"""
        if torch.is_tensor(idx):
            idx.tolist()
        
        # get annotations
        time_half = int(self.samples[idx][1]["gameTime"][0])
        time_minute = int(self.samples[idx][1]["gameTime"][-5:-3])
        time_second = int(self.samples[idx][1]["gameTime"][-2:])
        annotation = self.samples[idx][1]

        # Get label
        if ("card" in annotation["label"]): label = 0
        elif ("subs" in annotation["label"]): label = 1
        elif ("soccer" in annotation["label"]): label = 2
        elif ("background" in annotation["label"]): label = 3
        else: 
            print("Warning, label not compatible with set")
            return

        # Get audiopath
        audiopath = os.path.join(self.root_dir,
                              str(self.samples[idx][0]),
                                str(time_half)+"_audio.wav")
        

        
        one_hot_label = np.zeros(4)
        one_hot_label[label] = 1
        # Get video frames 
        
        # get start in second, use labeled time as center TODO: fix centerframe as keyframe and stride
        fps = 25.0 # assume fps = 25 for now, should be so
        start_sec = time_minute*60 + time_second
        end_sec = start_sec
        
        if start_sec == 0:
            end_sec += (1/fps) # possibly unstable solution
            
        end_sec = end_sec + self.wsize # might need to subtract 1/fps
        # Shift backwards to center around time but check that time > 0
        diff = (end_sec - start_sec) / 2 # TODO : Might result in bad precision
        temp_start_sec = start_sec - diff
        temp_end_sec = end_sec - diff

        # Only change as long as the shift operation doesnt shift out of bounds 
        if temp_start_sec >= 0:
            start_sec = temp_start_sec
            end_sec = temp_end_sec
        
        # Buffer to endsec incase of bad load
        end_sec = end_sec + 0.9 # loads more frames than needed, then reduced later
        """
        y, sr = librosa.load(audiopath,sr=SAMPLE_RATE,offset=start_sec,duration=self.wsize+1)
        
        print(len(y))
        y = y[:SAMPLE_RATE*4]
        
        ms = torchaudio.transforms.MelSpectrogram(sample_rate=SAMPLE_RATE,
                                                n_fft=N_FFT,
                                                win_length=WINDOW_LENGTH,
                                                hop_length=STEP_SIZE)(torch.Tensor(y)).log10().unsqueeze(0)
        print(f"ms size = {ms.size()}")
        
        
        ms = ms[:,:,:401]
        if ms.size() != (1,128,401):
            print(f"ms size :{ms.size()}, using zeros instead ...")
            ms = torch.zeros((1,128,401))
        """
        
        sample = {'audiopath': audiopath, 'annotation':annotation,'idx':idx, 'one_hot_label':one_hot_label,'label':label}
        
        return sample
            
    def get_annotations(self,path):
        """ Reads json files and returns """
        with open(self.root_dir+path+"/Labels.json") as jsonfile:
            json_label = json.load(jsonfile)
        
        labels = [e for e in json_label['annotations']]
        
        return path,labels
    def get_keyframe(self,idx):
        if self.frame_center == 'back': return self.__getitem__(idx)['clip'][0,:,:,:]
        elif self.frame_center == 'center': return self.__getitem__(idx)['clip'][self.nframes//2,:,:,:]
        elif self.frame_center == 'front': return self.__getitem__(idx)['clip'][self.nframes-1,:,:,:]
    def describe(self):
        card = 0
        subs = 0
        goal = 0
        background = 0

        for sample in self.samples:
            annotation = sample[1]
        # Get label
            if ("card" in annotation["label"]): card += 1
            elif ("subs" in annotation["label"]): subs +=1
            elif ("soccer" in annotation["label"]): goal += 1
            elif ("background" in annotation["label"]): background += 1

        print("Description of dataset\n\n")
        print("\n ********* Classes *********")
        print("\n card = 0\n subs = 1\n goals = 2\n background = 3")

        print("\n ********* Distribution and count *********")
        print(f"\n N card: {card} \n N subs: {subs} \n N goal: {goal} \n N background: {background} \n \n Total : {card+subs+goal+background}")
        
        print("\n\n ********* Configuration *********")
        print(f"\n npy_file: {self.npy_file} \n tshift: {self.tshift} \n root_dir: {self.root_dir} \n transform: {self.transform} \n frame_center: {self.frame_center} \n nframes: {self.nframes} \n stride_frames: {self.stride_frames} \n background: {self.background}")
        print("\n\n ********* End of description *********")

In [21]:
X = SoccerNetDataset(root_dir=root_dir,npy_file=train_file)

In [40]:
X[0]

{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley/1_audio.wav',
 'annotation': {'gameTime': '1 - 13:10',
  'label': 'soccer-ball',
  'team': 'home',
  'duration1': 'Duration: 00:45:00.00',
  'duration2': 'Duration: 00:45:00.00'},
 'idx': 0,
 'one_hot_label': array([0., 0., 1., 0.]),
 'label': 2}

In [41]:
samples = list()
for e in X:
    samples.append(e)
    

In [53]:
np.save(file=root_dir+"train_samples.npy",arr=samples,allow_pickle=True)

In [54]:
s = np.load(root_dir+"train_samples.npy",allow_pickle=True)

In [59]:
samples

In [44]:

for k in samples:
    print(k)

{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley/1_audio.wav', 'annotation': {'gameTime': '1 - 13:10', 'label': 'soccer-ball', 'team': 'home', 'duration1': 'Duration: 00:45:00.00', 'duration2': 'Duration: 00:45:00.00'}, 'idx': 0, 'one_hot_label': array([0., 0., 1., 0.]), 'label': 2}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley/1_audio.wav', 'annotation': {'gameTime': '1 - 40:08', 'label': 'y-card', 'team': 'away', 'duration1': 'Duration: 00:45:00.00', 'duration2': 'Duration: 00:45:00.00'}, 'idx': 1, 'one_hot_label': array([1., 0., 0., 0.]), 'label': 0}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley/2_audio.wav', 'annotation': {'gameTime': '2 - 17:02', 'label': 'y-card', 'team': 'away', 'duration1': 'Duration: 00:45:00.00', 'duration2':

{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/europe_uefa-champions-league/2014-2015/2014-11-04 - 22-45 Dortmund 4 - 1 Galatasaray/2_audio.wav', 'annotation': {'gameTime': '2 - 25:18', 'label': 'substitution-in', 'team': 'home', 'duration1': 'Duration: 00:45:05.00', 'duration2': 'Duration: 00:47:50.00'}, 'idx': 727, 'one_hot_label': array([0., 1., 0., 0.]), 'label': 1}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/europe_uefa-champions-league/2014-2015/2014-11-04 - 22-45 Dortmund 4 - 1 Galatasaray/2_audio.wav', 'annotation': {'gameTime': '2 - 28:46', 'label': 'soccer-ball', 'team': 'home', 'duration1': 'Duration: 00:45:05.00', 'duration2': 'Duration: 00:47:50.00'}, 'idx': 728, 'one_hot_label': array([0., 0., 1., 0.]), 'label': 2}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/europe_uefa-champions-league/2014-2015/2014-11-04 - 22-45 Dortmund 4 - 1 Galatasaray/2_audio.wav', 'annotation': {'gameTime': '2 - 36:25', 'label

{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/france_ligue-1/2015-2016/2015-09-26 - 18-30 Nantes 1 - 4 Paris SG/2_audio.wav', 'annotation': {'gameTime': '2 - 31:46', 'label': 'substitution-in', 'team': 'home', 'duration1': 'Duration: 00:45:00.00', 'duration2': 'Duration: 00:45:53.00'}, 'idx': 1509, 'one_hot_label': array([0., 1., 0., 0.]), 'label': 1}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/france_ligue-1/2015-2016/2015-09-26 - 18-30 Nantes 1 - 4 Paris SG/2_audio.wav', 'annotation': {'gameTime': '2 - 34:13', 'label': 'soccer-ball', 'team': 'away', 'duration1': 'Duration: 00:45:00.00', 'duration2': 'Duration: 00:45:53.00'}, 'idx': 1510, 'one_hot_label': array([0., 0., 1., 0.]), 'label': 2}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/france_ligue-1/2015-2016/2015-09-26 - 18-30 Nantes 1 - 4 Paris SG/2_audio.wav', 'annotation': {'gameTime': '2 - 36:59', 'label': 'substitution-in', 'team': 'away', 'duration1': 'Dur

{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/italy_serie-a/2014-2015/2015-04-19 - 21-45 Inter 0 - 0 AC Milan/2_audio.wav', 'annotation': {'gameTime': '2 - 35:34', 'label': 'substitution-in', 'team': 'away', 'duration1': 'Duration: 00:45:00.00', 'duration2': 'Duration: 00:45:00.00'}, 'idx': 2245, 'one_hot_label': array([0., 1., 0., 0.]), 'label': 1}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/italy_serie-a/2014-2015/2015-04-19 - 21-45 Inter 0 - 0 AC Milan/2_audio.wav', 'annotation': {'gameTime': '2 - 36:16', 'label': 'y-card', 'team': 'away', 'duration1': 'Duration: 00:45:00.00', 'duration2': 'Duration: 00:45:00.00'}, 'idx': 2246, 'one_hot_label': array([1., 0., 0., 0.]), 'label': 0}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/italy_serie-a/2014-2015/2015-04-19 - 21-45 Inter 0 - 0 AC Milan/2_audio.wav', 'annotation': {'gameTime': '2 - 37:38', 'label': 'y-card', 'team': 'home', 'duration1': 'Duration: 00:45:00.00',

{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/spain_laliga/2015-2016/2015-08-29 - 21-30 Barcelona 1 - 0 Malaga/2_audio.wav', 'annotation': {'gameTime': '2 - 28:00', 'label': 'substitution-in', 'team': 'away', 'duration1': 'Duration: 00:45:00.00', 'duration2': 'Duration: 00:47:01.00'}, 'idx': 3070, 'one_hot_label': array([0., 1., 0., 0.]), 'label': 1}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/spain_laliga/2015-2016/2015-08-29 - 21-30 Barcelona 1 - 0 Malaga/2_audio.wav', 'annotation': {'gameTime': '2 - 41:01', 'label': 'substitution-in', 'team': 'home', 'duration1': 'Duration: 00:45:00.00', 'duration2': 'Duration: 00:47:01.00'}, 'idx': 3071, 'one_hot_label': array([0., 1., 0., 0.]), 'label': 1}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/spain_laliga/2015-2016/2015-08-29 - 21-30 Barcelona 1 - 0 Malaga/2_audio.wav', 'annotation': {'gameTime': '2 - 44:02', 'label': 'y-card', 'team': 'away', 'duration1': 'Duration: 0

{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/spain_laliga/2016-2017/2017-05-06 - 19-30 Barcelona 4 - 1 Villarreal/2_audio.wav', 'annotation': {'gameTime': '2 - 35:13', 'label': 'y-card', 'team': 'away', 'duration1': 'Duration: 00:45:39.00', 'duration2': 'Duration: 00:45:00.00'}, 'idx': 3888, 'one_hot_label': array([1., 0., 0., 0.]), 'label': 0}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/spain_laliga/2016-2017/2017-05-06 - 19-30 Barcelona 4 - 1 Villarreal/2_audio.wav', 'annotation': {'gameTime': '2 - 36:15', 'label': 'soccer-ball', 'team': 'home', 'duration1': 'Duration: 00:45:39.00', 'duration2': 'Duration: 00:45:00.00'}, 'idx': 3889, 'one_hot_label': array([0., 0., 1., 0.]), 'label': 2}
{'audiopath': '/work/oarongve/data/sound_dataset/SoccerNet-code/data/spain_laliga/2016-2017/2017-05-06 - 19-30 Barcelona 4 - 1 Villarreal/2_audio.wav', 'annotation': {'gameTime': '2 - 37:28', 'label': 'substitution-in', 'team': 'home', 'duration1': 'Dur

# Simple network

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.bn1 = nn.BatchNorm2d(6)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.bn2 = nn.BatchNorm2d(16)
        self.pool = nn.MaxPool2d(4, 4)
        self.fc1 = nn.Linear(2208, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 4)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 2208)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)

In [None]:
# Disable warnings
import warnings
warnings.filterwarnings('ignore')

# Note: Can also see warning once
# warnings.filterwarnings(action='once')

In [None]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('runs/mel_spec_experiment_4')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)

In [None]:
outputs

In [None]:
(labels == torch.argmax(outputs, dim=1)).sum() 

In [None]:
labels

In [None]:
torch.argmax(outputs, dim=1)

In [None]:
len(dataloader)

In [None]:
writer = SummaryWriter('runs/mel_spec_experiment_4')

In [None]:
net.to(device)
# Dataloader or dataset faulty(returns batches of 0
epochs = 2
for epoch in range(epochs):
    running_loss = 0.0
    
    net.train()
    for i, data in enumerate(dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data['mel_spectogram'].to(device),data['label'].to(device)

        #print(inputs.size())
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 5 == 0:    # print every 2000 mini-batches
            
            writer.add_scalar('training loss',
                            running_loss / 5,
                            epoch * len(dataloader) + i)
            
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 5))
            running_loss = 0.0
    
    # calculate accuracy
    with torch.no_grad():
        net.eval()
        res = torch.zeros((3,3))
        for i, data in enumerate(dataloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data['mel_spectogram'].to(device),data['label'].to(device)

            # forward + backward + optimize
            outputs = net(inputs)
            
            preds = torch.argmax(outputs,dim=1)
            
            for p,gt in zip(preds,labels):
                res[int(p),int(gt)] += 1
            

        

        N_total = res.sum()
        N_correct = res.diag().sum()
        
        acc = N_total / N_correct
        
        writer.add_scalar('training acc',
                    acc,
                    epoch)
        print(f"Epoch : {epoch}, Accuracy : {acc}")
        
print('Finished Training')

In [None]:
writer.close()

# Evaluate samples to look for cases where y, sr from librosa is inconsistent

In [None]:
outputs = net(inputs)

In [None]:
inputs.size()

In [None]:
net.fc1.weight

# With GPU

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
net.to(device)

running_loss = 0.0
for i, data in enumerate(dataloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data['mel_spectogram'].unsqueeze(0).to(device),torch.argmax(data['one_hot_label']).to(device)

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # print statistics
    running_loss += loss.item()
    if i % 2000 == 1999:    # print every 2000 mini-batches
        print('[%d, %5d] loss: %.3f' %
              (epoch + 1, i + 1, running_loss / 2000))
        running_loss = 0.0

print('Finished Training')

In [None]:
%%bash
nvidia-smi