!pip install editdistance

In [4]:
%load_ext autoreload
%autoreload 2

import os
import datetime as dt

import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

import models
import encoders
import decoders
from training import train, test, checkpoint, main_wandb, main_simple
from dataloaders import load_data, make_loaders, append_SOS


SEED = 27

np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
cuda


In [5]:
M = 8

data_params = {'dataset_path': '/scratch/users/udemir15/ELEC491/bassline_transcription/data/datasets/[28, 51]',
               'dataset_name': 'TechHouse_bassline_representations',
               'scale_type': 'min',
               'M': M}

X, titles = load_data(data_params)
X = append_SOS(X)

K = X.max()+1 # Number of classes, assumes consecutive [0,max] inclusive
sequence_length = X.shape[1]

print('Number of classes: {}\nSequence Length: {}'.format(K, sequence_length))
print('Number of data points: {}'.format(X.shape[0]))

Number of classes: 27
Sequence Length: 65
Number of data points: 4421


In [None]:
X.shape

X = X[:8]

frequencies = torch.tensor(np.unique(X, return_counts=True)).to(device)
cross_entropy_weights = frequencies[1].max() / frequencies[1]
cross_entropy_weights /= cross_entropy_weights.sum()
print('Cross entropy weights:\n{}\n'.format(cross_entropy_weights))
cross_entropy_weights = torch.FloatTensor(1 / (frequencies[1] / frequencies[1].max())).to(device)
print(cross_entropy_weights)

#encoder = encoders.LSTMEncoder(**encoder_params)
#decoder = decoders.SimpleLSTMDecoder(**decoder_params)
#model = models.Seq2SeqLSTM(encoder, decoder, device).to(device)

encoder = encoders.GRUEncoder(**encoder_params)
decoder = decoders.SimpleGRUDecoder(**decoder_params)
model = models.Seq2SeqGRU(encoder, decoder, device).to(device)

project_name = 'seq2seq_gru_simple'

encoder = encoders.GRUEncoder(**encoder_params)
decoder = decoders.GRUDecoder(**decoder_params)
model = models.Seq2SeqGRU(encoder, decoder, device).to(device)

project_name = 'seq2seq_gru'

In [None]:
train_params = {'batch_size': 32,
               'N_epochs': 150,
               'lr': 5e-4,
               'teacher_forcing_ratio': 0.50}

decoder_hidden_size = 64

encoder_params = {'input_size': K,
                  'embedding_size': 12,
                  'hidden_size': 64,
                  'output_size': decoder_hidden_size}

decoder_params = {'output_size': K,
                  'embedding_size': encoder_params['embedding_size'],
                  #'encoder_hidden_size': encoder_params['hidden_size'],
                  'hidden_size': decoder_hidden_size}

params = {'data': data_params,
          'encoder': encoder_params,
          'decoder':decoder_params, 
          'training':train_params}

project_name = 'seq2seq_gru_attention'

encoder = encoders.BidirectionalGRUEncoder(**encoder_params).to(device)
decoder = decoders.GRUDecoderWithAttention(**decoder_params).to(device)

model = models.Seq2SeqGRUWithAttention(encoder, decoder, device)

print(model)
print('Number of parameters: {}\n'.format(sum([parameter.numel() for parameter in model.parameters()])))

criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=train_params['lr'])

train_loader, validation_loader, test_loader = make_loaders(X, train_params)

main_wandb(model, criterion, optimizer, device, train_loader, validation_loader, test_loader, params, project_name)
#main_simple(model, criterion, optimizer, device, train_loader, validation_loader, test_loader, params)

Seq2SeqGRUWithAttention(
  (encoder): BidirectionalGRUEncoder(
    (embedding): Embedding(27, 12)
    (rnn): GRU(12, 64, batch_first=True, bidirectional=True)
    (fc): Linear(in_features=128, out_features=64, bias=True)
  )
  (decoder): GRUDecoderWithAttention(
    (attention): Attention(
      (attn): Linear(in_features=91, out_features=64, bias=True)
      (v): Linear(in_features=64, out_features=1, bias=False)
    )
    (embedding): Embedding(27, 12)
    (rnn): GRU(39, 64, batch_first=True)
    (fc_out): Linear(in_features=103, out_features=27, bias=True)
  )
)
Number of parameters: 67776


















  0%|          | 0/150 [00:00<?, ?it/s]




Before Training:
Test Loss: 3.3450, Test Accuracy: 0.0125

Sample:
tensor([16, 11, 14, 18, 20, 16, 11, 14, 22,  3, 20, 16, 11, 14, 22,  3, 20, 16,
        11, 14, 22,  3, 20, 16, 11, 14, 22,  3, 20, 16, 11, 14, 22,  3, 20, 16,
        11, 14, 22,  3, 13, 20, 16, 11, 14, 22,  3, 13, 20, 16, 11, 14, 22,  3,
        13, 20, 16, 11, 14, 22,  3, 13, 20, 16], device='cuda:0')





























































































































  1%|          | 1/150 [00:19<47:18, 19.05s/it]



Epoch: 0, train_loss: 2.337356, val_loss: 2.059098

Sample:
tensor([ 1, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26], device='cuda:0')






























































































































  1%|▏         | 2/150 [00:33<43:54, 17.80s/it]






























































































































  2%|▏         | 3/150 [00:47<40:23, 16.49s/it]































































































































  3%|▎         | 4/150 [01:00<37:58, 15.61s/it]































































































































  3%|▎         | 5/150 [01:14<36:10, 14.97s/it]































































































































  4%|▍         | 6/150 [01:27<34:50, 14.52s/it]





























































































































  5%|▍         | 7/150 [01:41<33:49, 14.19s/it]





























































































































  5%|▌         | 8/150 [01:54<33:05, 13.98s/it]




































































































































  6%|▌         | 9/150 [02:08<32:28, 13.82s/it]































































































































  7%|▋         | 10/150 [02:21<32:00, 13.72s/it]



























































































































  7%|▋         | 11/150 [02:35<31:43, 13.70s/it]
































































































































  8%|▊         | 12/150 [02:48<31:23, 13.65s/it]































































































































  9%|▊         | 13/150 [03:02<30:59, 13.58s/it]































































































































  9%|▉         | 14/150 [03:15<30:40, 13.53s/it]






























































































































 10%|█         | 15/150 [03:29<30:23, 13.51s/it]
































































































































 11%|█         | 16/150 [03:42<30:07, 13.49s/it]































































































































 11%|█▏        | 17/150 [03:56<29:55, 13.50s/it]






























































































































 12%|█▏        | 18/150 [04:09<29:41, 13.50s/it]
































































































































 13%|█▎        | 19/150 [04:23<29:25, 13.47s/it]






























































































































 13%|█▎        | 20/150 [04:36<29:09, 13.46s/it]
































































































































 14%|█▍        | 21/150 [04:49<28:55, 13.45s/it]









































































In [None]:
def display_attention(sentence, translation, attention):
    
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    
    attention = attention.squeeze(1).cpu().detach().numpy()
    a
    cax = ax.matshow(attention, cmap='bone')
   
    ax.tick_params(labelsize=15)
    
    #x_ticks = [''] + ['<sos>'] + [t.lower() for t in sentence] + ['<eos>']
    #y_ticks = [''] + translation
     
    #ax.set_xticklabels(x_ticks, rotation=45)
    #ax.set_yticklabels(y_ticks)

    #ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    #ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()
    plt.close()

In [None]:
bassline = next(iter(train_loader))[0].to(device)

In [None]:
reconstruction, attention = reconstruct_bassline(model, bassline)

In [None]:
i=6
display_attention(bassline[i], reconstruction[i], attention[i])

In [None]:
out, att = model(trg, trg, 0, True)

In [None]:
len(att)

In [None]:
att[0].shape

In [None]:
test_loss, test_acc = test(model, test_loader, criterion, device)
samples = model.sample()

for epoch in tqdm(range(train_params['N_epochs'])):

    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    validation_loss, validation_acc = test(model, validation_loader, criterion, device)
    
    model.decoder.update_teacher_forcing_ratio(decoder_params['teacher_forcing_ratio']/train_params['N_epochs'])
    
    if epoch+1 % 5:
        print('Epoch: {}, train_loss: {:.6f}, acc: {:.3f}'.format(epoch+1, train_loss, train_acc))

test_loss, test_acc = test(model, test_loader, criterion, device)
print('Test Loss After Training: {:.6f}'.format(test_loss))

In [None]:
encoder_params = {'input_size': K,
                  'embedding_size': 16,
                  'hidden_size': 256,
                  'n_layers': 1}

train_params = {'batch_size': 32,
               'N_epochs': 150,
               'lr': 5e-4}

In [None]:
input_size  =K
embedding_size = 12
enc_hidden_size = 128
dec_hidden_size = 100

model = encoders.BidirectionalGRUEncoder(input_size, dec_hidden_size, embedding_size, enc_hidden_size)

In [None]:
train_loader, validation_loader, test_loader = make_loaders(X, train_params)

batch = next(iter(train_loader))[0]

In [None]:
outputs, hidden = model(batch)

In [None]:
att = decoders.Attention(enc_hidden_size, dec_hidden_size)