In [1]:
import numpy as np
import copy
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torchsummary import summary
import matplotlib.pyplot as plt
import librosa
import librosa.display
from tqdm import tqdm
import json
import os
import pickle
import sys
from pathlib import Path
from sklearn.model_selection import train_test_split
import soundfile as sf
from utils import GRU

## Creating the chunks

In [2]:
SR = 44100
HOP = 256
FRAMES = 6
BATCH_SIZE = 2048

In [3]:
chord_detector = GRU()
chord_detector.load_state_dict(torch.load('./models/chord_detector.pth'))
chord_detector.eval()

GRU(
  (gru): GRU(12, 256, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=512, out_features=12, bias=True)
)

In [4]:
class MelChordDataset(Dataset):
    def __init__(
            self, 
            data_location = "../../../../Music Technology/Datasets/musdb18hq/",
            out_location = "../../../../Music Technology/Datasets/musdb18hq/",
            frames_per_chord = 6,
            train = True,
            write_data = False
        ):
        super(MelChordDataset).__init__()
        if write_data:
            self._write_chords_and_audio(data_location, out_location, train)
        self.frames_per_chord = frames_per_chord
        vocals_y = []
        vocals_chroma = []
        chord_templates:dict = json.load(open('./chord_templates.json'))
        act_chord_data = []
        self.data_location= data_location+"chunks_vocal/"
        self.out_location= out_location+"chunks_chord/"
        if not train:
            self.data_location[:-1]+="_test/"
            self.out_location[:-1]+="_test/"

        for i in range(len(os.listdir(self.data_location))): # 100
            with open(self.out_location+"chord_"+str(i), "rb") as fp:
                chord_data = pickle.load(fp)
            act_chord_data.append(torch.Tensor(np.array([np.array(chord_templates[i]) for i in chord_data])))
            vocals_y.append(librosa.load(self.data_location + 'vocal_'+str(i)+'.wav', sr=SR)[0])
            vocals_chroma.append(torch.Tensor(librosa.feature.chroma_cens(y=vocals_y[-1], sr = SR, hop_length=HOP)).T)
        
        # act_chord_data[i]: Shape: (num_chords[i], 12)
        # vocals_chroma[i]: Shape: (num_frames[i], 12)
        # num_chords[i] = (num_frames[i] // frames_per_chord)

        self.data = []
        self._create_data(act_chord_data, vocals_chroma)
    
    def _create_data(self, chord_data, chroma_data):
        for (chroma, chords) in zip(chroma_data, chord_data):
            for i in range(0, chroma.shape[0]-self.frames_per_chord, self.frames_per_chord):
                block_chroma = chroma[i:i+self.frames_per_chord,:]
                block_chord = chords[i//self.frames_per_chord]
                if(block_chroma.any()):
                    self.data.append((block_chroma, block_chord))

    def _write_chords_and_audio(
            self, 
            data_location, 
            out_location, 
            train = True
        ):
        if train:
            data_location = data_location+"train/"
        else:
            data_location = data_location+"test/"
        folders = os.listdir(data_location)
        count = 0

        for folder in folders:
            if not os.path.isdir(data_location+folder):
                continue
            mixture_y, _ = librosa.load(data_location + '/' + folder + '/mixture.wav', sr=SR)
            vocals_y, _ = librosa.load(data_location + '/' + folder + '/vocals.wav', sr=SR)
            mixture_y = mixture_y/np.max(np.abs(mixture_y))
            vocals_y = vocals_y/np.max(np.abs(vocals_y))

            mixture_chroma = torch.Tensor(librosa.feature.chroma_cens(y=mixture_y, sr = SR, hop_length=HOP)).T
            chunk_length = FRAMES
            nchunks = mixture_chroma.shape[0] // chunk_length # no padding

            if train:
                if not os.path.isdir(out_location+'chunks_chord'):
                    os.mkdir(out_location+'chunks_chord')
                if not os.path.isdir(out_location+'chunks_vocal'):
                    os.mkdir(out_location+'chunks_vocal')
            else:
                if not os.path.isdir(out_location+'chunks_chord_test/'):
                    os.mkdir(out_location+'chunks_chord_test')
                if not os.path.isdir(out_location+'chunks_vocal_test/'):
                    os.mkdir(out_location+'chunks_vocal_test')

            # Get chords from mixture chroma
            chord_stack, time = MelChordDataset.prediction(chord_detector, mixture_chroma)
            frame_num = np.array([int(i/((HOP/SR)*6)) for i in time])
            chord_stack = np.array([frame_num, chord_stack]).T
            chords = []
            for prev, curr in zip(chord_stack[:-1], chord_stack[1:]):
                frame_diff = int(curr[0]) - int(prev[0])
                chords.extend([prev[1] for _ in range(frame_diff)])
            chords.extend([chord_stack[-1][1] for _ in range(nchunks - len(chords))])

            if train:
                with open(out_location+"chunks_chord/chord_"+str(count), "wb") as fp:
                    pickle.dump(chords, fp)
                sf.write(out_location + 'chunks_vocal/vocal_' + str(count)+'.wav', vocals_y, SR)
            else:
                with open(out_location+"chunks_chord_test/chord_"+str(count), "wb") as fp:
                    pickle.dump(chords, fp)
                sf.write(out_location + 'chunks_vocal_test/vocal_' + str(count)+'.wav', vocals_y, SR)
            count+=1

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index]
    
    @staticmethod
    def predict(model, audio, chroma_req = True, chord_templates:dict = json.load(open('./chord_templates.json')), sr = SR, hop = HOP):
        if chroma_req:
            chroma = torch.Tensor(librosa.feature.chroma_cens(y=audio, sr = sr, hop_length=hop)).T.unsqueeze(0)
        else:
            chroma = audio
        with torch.no_grad():
            outputs = nn.functional.softmax(model(chroma), 1)[0]
        min_val = 120
        min_key = ''
        for key, val in chord_templates.items():
            out = torch.norm(torch.Tensor(val) - outputs)
            if min_val >= out:
                min_val = out
                min_key = key
        return min_key
    
    @staticmethod
    def prediction(model, chroma, frame = 6):
        stack = []
        time = []
        model.eval()
        pred = MelChordDataset.predict(model, chroma[:frame, :].unsqueeze(0), False)
        prev_pred = pred
        dur = 1
        main_sub = 0
        for i in tqdm(range(frame, chroma.shape[0]-frame+1, frame)):
            model.eval()
            pred = MelChordDataset.predict(model, chroma[i:i+frame, :].unsqueeze(0), False)
            if(pred != prev_pred):
                if(dur>10):
                    if(len(stack)==0):
                        stack.append(prev_pred)
                    elif(stack[-1]==prev_pred):
                        dur = 0
                        prev_pred = pred
                        continue
                    else:
                        stack.append(prev_pred)
                    if len(time)!=0:
                        time.append((i)*HOP/SR - main_sub)
                    else:
                        main_sub = (i)*HOP/SR
                        time.append(0.0)
                dur = 0
                prev_pred = pred
            dur+=1
        return stack, time

In [5]:
# train_data = MelChordDataset(train = True, write_data = False)
# test_data = MelChordDataset(train = False, write_data = False)
# torch.save(train_data, './data/final/train_data.pt')
# torch.save(test_data, './data/final/test_data.pt')

In [6]:
train_data = torch.load('./data/final/train_data.pt')
test_data = torch.load('./data/final/test_data.pt')

In [7]:
print(len(train_data))
print(len(test_data))

576514
297423


In [8]:
train_loader = DataLoader(
    train_data,
    BATCH_SIZE,
    shuffle=False
)
test_loader = DataLoader(
    test_data,
    BATCH_SIZE,
    shuffle=False
)

In [9]:
# class GRU(nn.Module):
#     def __init__(self, input_size = 12, hidden_size = 64, num_layers = 1, num_classes = 12, bidirectional = True) -> None:
#         super(GRU, self).__init__()
#         self.num_layers = num_layers
#         self.hidden_size = hidden_size
#         self.bidirectional = bidirectional

#         self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first = True, bidirectional=bidirectional)
#         if(bidirectional):
#             self.fc = nn.Linear(hidden_size*2, num_classes)
#         else:
#             self.fc = nn.Linear(hidden_size, num_classes)

#     def forward(self, x):
#         if(self.bidirectional):
#             h0 = torch.zeros(2*self.num_layers, x.size(0), self.hidden_size)
#         else:
#             h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
#         out, _ = self.gru(x, h0)
#         out = out[:,-1,:] # Since we only want the output of the last cell
#         out = self.fc(out)
#         return(out)


In [26]:
class Predictor(nn.Module):
    def __init__(self, device = 'mps'):
        super(Predictor, self).__init__() # Transpose as well
        self.device = torch.device(device)
        self.conv1 = nn.Conv2d(1, 2, (1,3), 1, (0, 1), device=self.device)
        self.conv2 = nn.Conv2d(2, 4, (1,3), 1, (0, 1), device=self.device)
        self.conv3 = nn.Conv2d(4, 8, (1,3), device=self.device)
        self.conv4 = nn.Conv2d(8, 12, (1,3), device=self.device)
        self.FC = nn.Linear(288, 12, device=self.device)
        
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = torch.relu(self.conv4(x))
        x = torch.flatten(x, 1)
        x = torch.softmax(self.FC(x), 1)
        return x

In [27]:
device = torch.device('mps')

In [28]:
model = Predictor(device=device).to(device)

In [29]:
summary(copy.deepcopy(model).to('cpu'), (1, 12, 6))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1             [-1, 2, 12, 6]               8
            Conv2d-2             [-1, 4, 12, 6]              28
            Conv2d-3             [-1, 8, 12, 4]             104
            Conv2d-4            [-1, 12, 12, 2]             300
            Linear-5                   [-1, 12]           3,468
Total params: 3,908
Trainable params: 3,908
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.01
Estimated Total Size (MB): 0.02
----------------------------------------------------------------


In [30]:
num_epochs = 200
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 5e-2)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10, 0.9)

In [31]:
torch.min(torch.tensor(np.array([[1,2],[5,-2]])), dim=1)

torch.return_types.min(
values=tensor([ 1, -2]),
indices=tensor([0, 1]))

In [32]:
def predict(model, audio, chroma_req = True, chord_templates:dict = json.load(open('./chord_templates.json')), sr = SR, hop = HOP, device = 'cpu'):
    if chroma_req:
        chroma = torch.Tensor(librosa.feature.chroma_cens(y=audio, sr = sr, hop_length=hop)).T.unsqueeze(0)
    else:
        chroma = audio
    with torch.no_grad():
        outputs = nn.functional.softmax(model(chroma), 1)
    min_val = torch.Tensor([10000 for _ in range(outputs.shape[0])]).to(device)
    min_key = ["" for _ in range(outputs.shape[0])]
    for key, val in chord_templates.items():
        out = torch.Tensor([torch.norm(torch.Tensor(val).to(device) - i) for i in outputs]).to(device)
        min_val = torch.min(torch.stack([min_val, out], dim=1), dim=1)
        for i, truth in enumerate(min_val.indices==1):
            if truth:
                min_key[i] = key
        min_val = min_val.values
    return min_key

In [33]:
best_weights = copy.deepcopy(model.state_dict())
max = 0
val_acc = 0
train_acc = 0
chord_templates:dict = json.load(open('./chord_templates.json'))
for epoch in range(num_epochs):
    device = 'mps'
    model = model.to(device)
    model.train()
    for i, (chromas,chords) in tqdm(enumerate(train_loader)):
        chromas = torch.transpose(chromas,1,2).unsqueeze(1).to(device)
        chords = chords.to(device)

        preds = model(chromas)
        loss = criterion(preds, chords)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    scheduler.step() # Decaying learning rate per 25 epochs by 0.2 times
    print(f'Epoch {epoch+1}/{num_epochs}; Loss = {loss.item():.6f}; LR = {scheduler.get_last_lr()}')
    if (epoch+1)%10==0:
        with torch.no_grad():
            n_samples = 0
            n_correct = 0
            model.eval()
            device = 'cpu'
            model = model.to(device)
            for chromas, chords in tqdm(test_loader):
                chromas = torch.transpose(chromas,1,2).unsqueeze(1).to(device)
                chords = chords.to(device)
                pred_outputs1 = model(chromas)
                prediction = predict(model, chromas, chroma_req=False)
                predictions = torch.tensor(np.array([chord_templates[i] for i in prediction])).to(device)
                n_samples += chords.shape[0]*chords.shape[1]
                n_correct += (predictions == chords).sum().item()
            val_acc = n_correct/n_samples * 100

            if (max <= (n_correct/n_samples * 100)):
                print('SAVED MODEL WEIGHTS')
                max = val_acc
                best_weights = copy.deepcopy(model.state_dict())

            if (epoch+1)%100==0:
                n_samples = 0
                n_correct = 0
                
                for chromas, chords in tqdm(train_loader):
                    chromas = torch.transpose(chromas,1,2).unsqueeze(1).to(device)
                    chords = chords.to(device)
                    pred_outputs1 = model(chromas)
                    prediction = predict(model, chromas, chroma_req=False)
                    predictions = torch.tensor(np.array([chord_templates[i] for i in prediction])).to(device)
                    n_samples += chords.shape[0]*chords.shape[1]
                    n_correct += (predictions == chords).sum().item()
                train_acc = n_correct/n_samples * 100
                print(f'Train Accuracy: {train_acc:.2f}%')
        print(f'Dev Accuracy: {val_acc:.2f}%')
        print("-"*20)

282it [00:03, 93.59it/s] 


Epoch 1/200; Loss = 0.619541; LR = [0.05]


282it [00:02, 98.71it/s]


Epoch 2/200; Loss = 0.619423; LR = [0.05]


282it [00:02, 99.35it/s] 


Epoch 3/200; Loss = 0.619389; LR = [0.05]


282it [00:02, 99.58it/s] 


Epoch 4/200; Loss = 0.619372; LR = [0.05]


282it [00:02, 100.32it/s]


Epoch 5/200; Loss = 0.619363; LR = [0.05]


282it [00:02, 99.64it/s] 


Epoch 6/200; Loss = 0.619358; LR = [0.05]


282it [00:02, 97.70it/s] 


Epoch 7/200; Loss = 0.619354; LR = [0.05]


282it [00:02, 96.70it/s]


Epoch 8/200; Loss = 0.619352; LR = [0.05]


282it [00:02, 100.75it/s]


Epoch 9/200; Loss = 0.619350; LR = [0.05]


282it [00:02, 99.81it/s] 


Epoch 10/200; Loss = 0.619349; LR = [0.045000000000000005]


100%|██████████| 146/146 [02:00<00:00,  1.21it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.01%
--------------------


282it [00:02, 99.66it/s] 


Epoch 11/200; Loss = 0.619920; LR = [0.045000000000000005]


282it [00:02, 101.06it/s]


Epoch 12/200; Loss = 0.619913; LR = [0.045000000000000005]


282it [00:02, 101.19it/s]


Epoch 13/200; Loss = 0.619907; LR = [0.045000000000000005]


282it [00:04, 65.18it/s] 


Epoch 14/200; Loss = 0.619903; LR = [0.045000000000000005]


282it [00:02, 101.83it/s]


Epoch 15/200; Loss = 0.619900; LR = [0.045000000000000005]


282it [00:02, 101.04it/s]


Epoch 16/200; Loss = 0.619898; LR = [0.045000000000000005]


282it [00:02, 100.01it/s]


Epoch 17/200; Loss = 0.619897; LR = [0.045000000000000005]


282it [00:02, 101.32it/s]


Epoch 18/200; Loss = 0.619895; LR = [0.045000000000000005]


282it [00:02, 101.36it/s]


Epoch 19/200; Loss = 0.619894; LR = [0.045000000000000005]


282it [00:02, 101.31it/s]


Epoch 20/200; Loss = 0.619894; LR = [0.04050000000000001]


100%|██████████| 146/146 [02:00<00:00,  1.21it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.01%
--------------------


282it [00:02, 98.85it/s] 


Epoch 21/200; Loss = 0.620359; LR = [0.04050000000000001]


282it [00:02, 94.06it/s]


Epoch 22/200; Loss = 0.620354; LR = [0.04050000000000001]


282it [00:02, 95.49it/s]


Epoch 23/200; Loss = 0.620349; LR = [0.04050000000000001]


282it [00:02, 95.86it/s]


Epoch 24/200; Loss = 0.620346; LR = [0.04050000000000001]


282it [00:02, 96.70it/s]


Epoch 25/200; Loss = 0.620344; LR = [0.04050000000000001]


282it [00:02, 96.39it/s]


Epoch 26/200; Loss = 0.620342; LR = [0.04050000000000001]


282it [00:02, 98.11it/s]


Epoch 27/200; Loss = 0.620341; LR = [0.04050000000000001]


282it [00:02, 99.41it/s] 


Epoch 28/200; Loss = 0.620340; LR = [0.04050000000000001]


282it [00:02, 98.92it/s]


Epoch 29/200; Loss = 0.620339; LR = [0.04050000000000001]


282it [00:04, 65.15it/s] 


Epoch 30/200; Loss = 0.620338; LR = [0.03645000000000001]


100%|██████████| 146/146 [01:59<00:00,  1.22it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.80%
--------------------


282it [00:02, 99.16it/s] 


Epoch 31/200; Loss = 0.620739; LR = [0.03645000000000001]


282it [00:02, 99.44it/s] 


Epoch 32/200; Loss = 0.620735; LR = [0.03645000000000001]


282it [00:02, 99.50it/s] 


Epoch 33/200; Loss = 0.620731; LR = [0.03645000000000001]


282it [00:02, 99.99it/s] 


Epoch 34/200; Loss = 0.620729; LR = [0.03645000000000001]


282it [00:02, 98.92it/s] 


Epoch 35/200; Loss = 0.620727; LR = [0.03645000000000001]


282it [00:02, 100.32it/s]


Epoch 36/200; Loss = 0.620725; LR = [0.03645000000000001]


282it [00:02, 99.74it/s] 


Epoch 37/200; Loss = 0.620724; LR = [0.03645000000000001]


282it [00:02, 99.62it/s] 


Epoch 38/200; Loss = 0.620723; LR = [0.03645000000000001]


282it [00:02, 99.12it/s] 


Epoch 39/200; Loss = 0.620723; LR = [0.03645000000000001]


282it [00:02, 99.85it/s] 


Epoch 40/200; Loss = 0.620722; LR = [0.03280500000000001]


100%|██████████| 146/146 [02:01<00:00,  1.20it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.80%
--------------------


282it [00:02, 97.99it/s] 


Epoch 41/200; Loss = 0.621091; LR = [0.03280500000000001]


282it [00:02, 97.34it/s] 


Epoch 42/200; Loss = 0.621087; LR = [0.03280500000000001]


282it [00:02, 98.43it/s] 


Epoch 43/200; Loss = 0.621085; LR = [0.03280500000000001]


282it [00:02, 96.87it/s]


Epoch 44/200; Loss = 0.621083; LR = [0.03280500000000001]


282it [00:03, 92.20it/s]


Epoch 45/200; Loss = 0.621081; LR = [0.03280500000000001]


282it [00:03, 93.08it/s]


Epoch 46/200; Loss = 0.621080; LR = [0.03280500000000001]


282it [00:03, 93.65it/s]


Epoch 47/200; Loss = 0.621079; LR = [0.03280500000000001]


282it [00:03, 93.27it/s]


Epoch 48/200; Loss = 0.621078; LR = [0.03280500000000001]


282it [00:02, 99.04it/s] 


Epoch 49/200; Loss = 0.621078; LR = [0.03280500000000001]


282it [00:02, 100.34it/s]


Epoch 50/200; Loss = 0.621078; LR = [0.02952450000000001]


100%|██████████| 146/146 [02:00<00:00,  1.22it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.80%
--------------------


282it [00:02, 98.77it/s] 


Epoch 51/200; Loss = 0.621433; LR = [0.02952450000000001]


282it [00:02, 99.86it/s] 


Epoch 52/200; Loss = 0.621430; LR = [0.02952450000000001]


282it [00:04, 65.32it/s] 


Epoch 53/200; Loss = 0.621428; LR = [0.02952450000000001]


282it [00:02, 99.49it/s] 


Epoch 54/200; Loss = 0.621426; LR = [0.02952450000000001]


282it [00:02, 100.30it/s]


Epoch 55/200; Loss = 0.621425; LR = [0.02952450000000001]


282it [00:02, 99.51it/s] 


Epoch 56/200; Loss = 0.621424; LR = [0.02952450000000001]


282it [00:02, 100.71it/s]


Epoch 57/200; Loss = 0.621423; LR = [0.02952450000000001]


282it [00:02, 99.70it/s] 


Epoch 58/200; Loss = 0.621423; LR = [0.02952450000000001]


282it [00:02, 100.46it/s]


Epoch 59/200; Loss = 0.621423; LR = [0.02952450000000001]


282it [00:02, 99.34it/s] 


Epoch 60/200; Loss = 0.621422; LR = [0.02657205000000001]


100%|██████████| 146/146 [01:59<00:00,  1.22it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.80%
--------------------


282it [00:02, 98.38it/s] 


Epoch 61/200; Loss = 0.621769; LR = [0.02657205000000001]


282it [00:02, 99.56it/s] 


Epoch 62/200; Loss = 0.621767; LR = [0.02657205000000001]


282it [00:02, 99.36it/s] 


Epoch 63/200; Loss = 0.621765; LR = [0.02657205000000001]


282it [00:02, 98.59it/s]


Epoch 64/200; Loss = 0.621764; LR = [0.02657205000000001]


282it [00:02, 99.51it/s] 


Epoch 65/200; Loss = 0.621763; LR = [0.02657205000000001]


282it [00:02, 99.85it/s] 


Epoch 66/200; Loss = 0.621762; LR = [0.02657205000000001]


282it [00:02, 99.77it/s] 


Epoch 67/200; Loss = 0.621762; LR = [0.02657205000000001]


282it [00:04, 65.15it/s] 


Epoch 68/200; Loss = 0.621761; LR = [0.02657205000000001]


282it [00:02, 100.61it/s]


Epoch 69/200; Loss = 0.621761; LR = [0.02657205000000001]


282it [00:02, 101.06it/s]


Epoch 70/200; Loss = 0.621761; LR = [0.02391484500000001]


100%|██████████| 146/146 [01:59<00:00,  1.22it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.80%
--------------------


282it [00:02, 98.18it/s]


Epoch 71/200; Loss = 0.622095; LR = [0.02391484500000001]


282it [00:02, 99.31it/s] 


Epoch 72/200; Loss = 0.622093; LR = [0.02391484500000001]


282it [00:02, 100.26it/s]


Epoch 73/200; Loss = 0.622092; LR = [0.02391484500000001]


282it [00:02, 99.46it/s] 


Epoch 74/200; Loss = 0.622091; LR = [0.02391484500000001]


282it [00:02, 99.57it/s] 


Epoch 75/200; Loss = 0.622090; LR = [0.02391484500000001]


282it [00:02, 100.21it/s]


Epoch 76/200; Loss = 0.622090; LR = [0.02391484500000001]


282it [00:02, 99.88it/s] 


Epoch 77/200; Loss = 0.622089; LR = [0.02391484500000001]


282it [00:02, 99.70it/s] 


Epoch 78/200; Loss = 0.622089; LR = [0.02391484500000001]


282it [00:02, 99.59it/s] 


Epoch 79/200; Loss = 0.622089; LR = [0.02391484500000001]


282it [00:02, 99.85it/s] 


Epoch 80/200; Loss = 0.622088; LR = [0.021523360500000012]


100%|██████████| 146/146 [02:01<00:00,  1.20it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.80%
--------------------


282it [00:02, 97.77it/s]


Epoch 81/200; Loss = 0.622400; LR = [0.021523360500000012]


282it [00:02, 98.82it/s] 


Epoch 82/200; Loss = 0.622398; LR = [0.021523360500000012]


282it [00:02, 99.93it/s] 


Epoch 83/200; Loss = 0.622397; LR = [0.021523360500000012]


282it [00:02, 99.31it/s] 


Epoch 84/200; Loss = 0.622396; LR = [0.021523360500000012]


282it [00:02, 99.90it/s] 


Epoch 85/200; Loss = 0.622396; LR = [0.021523360500000012]


282it [00:02, 98.33it/s] 


Epoch 86/200; Loss = 0.622395; LR = [0.021523360500000012]


282it [00:02, 98.68it/s] 


Epoch 87/200; Loss = 0.622395; LR = [0.021523360500000012]


282it [00:02, 98.59it/s] 


Epoch 88/200; Loss = 0.622395; LR = [0.021523360500000012]


282it [00:02, 99.22it/s] 


Epoch 89/200; Loss = 0.622395; LR = [0.021523360500000012]


282it [00:02, 98.35it/s]


Epoch 90/200; Loss = 0.622395; LR = [0.01937102445000001]


100%|██████████| 146/146 [01:59<00:00,  1.22it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.80%
--------------------


282it [00:02, 99.59it/s] 


Epoch 91/200; Loss = 0.622673; LR = [0.01937102445000001]


282it [00:04, 65.31it/s]


Epoch 92/200; Loss = 0.622671; LR = [0.01937102445000001]


282it [00:02, 100.66it/s]


Epoch 93/200; Loss = 0.622671; LR = [0.01937102445000001]


282it [00:02, 100.68it/s]


Epoch 94/200; Loss = 0.622670; LR = [0.01937102445000001]


282it [00:02, 101.45it/s]


Epoch 95/200; Loss = 0.622670; LR = [0.01937102445000001]


282it [00:02, 100.66it/s]


Epoch 96/200; Loss = 0.622669; LR = [0.01937102445000001]


282it [00:02, 99.99it/s] 


Epoch 97/200; Loss = 0.622669; LR = [0.01937102445000001]


282it [00:02, 100.85it/s]


Epoch 98/200; Loss = 0.622669; LR = [0.01937102445000001]


282it [00:02, 100.39it/s]


Epoch 99/200; Loss = 0.622669; LR = [0.01937102445000001]


282it [00:02, 101.22it/s]


Epoch 100/200; Loss = 0.622669; LR = [0.01743392200500001]


100%|██████████| 146/146 [01:59<00:00,  1.22it/s]


SAVED MODEL WEIGHTS


100%|██████████| 282/282 [03:50<00:00,  1.23it/s]


Train Accuracy: 66.23%
Dev Accuracy: 64.80%
--------------------


282it [00:02, 101.05it/s]


Epoch 101/200; Loss = 0.622906; LR = [0.01743392200500001]


282it [00:04, 66.21it/s] 


Epoch 102/200; Loss = 0.622906; LR = [0.01743392200500001]


282it [00:02, 102.71it/s]


Epoch 103/200; Loss = 0.622905; LR = [0.01743392200500001]


282it [00:02, 102.02it/s]


Epoch 104/200; Loss = 0.622905; LR = [0.01743392200500001]


282it [00:02, 102.90it/s]


Epoch 105/200; Loss = 0.622904; LR = [0.01743392200500001]


282it [00:02, 101.54it/s]


Epoch 106/200; Loss = 0.622904; LR = [0.01743392200500001]


282it [00:02, 102.10it/s]


Epoch 107/200; Loss = 0.622904; LR = [0.01743392200500001]


282it [00:02, 102.16it/s]


Epoch 108/200; Loss = 0.622904; LR = [0.01743392200500001]


282it [00:02, 102.33it/s]


Epoch 109/200; Loss = 0.622904; LR = [0.01743392200500001]


282it [00:02, 102.51it/s]


Epoch 110/200; Loss = 0.622904; LR = [0.015690529804500006]


100%|██████████| 146/146 [02:00<00:00,  1.21it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.80%
--------------------


282it [00:02, 98.53it/s] 


Epoch 111/200; Loss = 0.623098; LR = [0.015690529804500006]


282it [00:02, 100.27it/s]


Epoch 112/200; Loss = 0.623098; LR = [0.015690529804500006]


282it [00:02, 99.24it/s] 


Epoch 113/200; Loss = 0.623098; LR = [0.015690529804500006]


282it [00:02, 99.97it/s] 


Epoch 114/200; Loss = 0.623097; LR = [0.015690529804500006]


282it [00:02, 99.59it/s] 


Epoch 115/200; Loss = 0.623097; LR = [0.015690529804500006]


282it [00:02, 100.05it/s]


Epoch 116/200; Loss = 0.623097; LR = [0.015690529804500006]


282it [00:04, 65.34it/s] 


Epoch 117/200; Loss = 0.623097; LR = [0.015690529804500006]


282it [00:02, 99.96it/s] 


Epoch 118/200; Loss = 0.623097; LR = [0.015690529804500006]


282it [00:02, 100.52it/s]


Epoch 119/200; Loss = 0.623097; LR = [0.015690529804500006]


282it [00:02, 99.48it/s] 


Epoch 120/200; Loss = 0.623096; LR = [0.014121476824050006]


100%|██████████| 146/146 [01:59<00:00,  1.23it/s]


SAVED MODEL WEIGHTS
Dev Accuracy: 64.80%
--------------------


282it [00:03, 90.97it/s]


Epoch 121/200; Loss = 0.623248; LR = [0.014121476824050006]


282it [00:02, 97.85it/s] 


Epoch 122/200; Loss = 0.623250; LR = [0.014121476824050006]


282it [00:02, 98.77it/s] 


Epoch 123/200; Loss = 0.623250; LR = [0.014121476824050006]


282it [00:02, 98.54it/s] 


Epoch 124/200; Loss = 0.623250; LR = [0.014121476824050006]


282it [00:02, 99.53it/s] 


Epoch 125/200; Loss = 0.623250; LR = [0.014121476824050006]


282it [00:02, 98.72it/s]


Epoch 126/200; Loss = 0.623249; LR = [0.014121476824050006]


282it [00:02, 99.71it/s] 


Epoch 127/200; Loss = 0.623249; LR = [0.014121476824050006]


282it [00:02, 101.07it/s]


Epoch 128/200; Loss = 0.623249; LR = [0.014121476824050006]


282it [00:02, 100.64it/s]


Epoch 129/200; Loss = 0.623249; LR = [0.014121476824050006]


282it [00:02, 100.81it/s]


Epoch 130/200; Loss = 0.623249; LR = [0.012709329141645007]


 42%|████▏     | 61/146 [00:52<01:13,  1.16it/s]


KeyboardInterrupt: 

In [35]:
torch.save(model.to('cpu').state_dict(), './models/chord_predictor1.pth')