In [1]:
# %matplotlib inline

import numpy as np
# from matplotlib import pyplot as plt
import time
import os
# import Levenshtein as L

import torch
import torch.nn as nn
import torch.nn.functional as F
#from torchnlp.nn import WeightDropLSTM
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [2]:
# load all that we need
dataset = np.load('./midis_array_guitar_new.npy')
chord_vocab = np.load('./chord_vocab.npy')

split_ratio = 0.95
train_dataset = []
val_dataset = []
for song in dataset:
    train_split = int(split_ratio * len(song))
    train_dataset.append(song[:train_split])
    val_dataset.append(song[train_split:])
train_dataset = np.array(train_dataset)
val_dataset = np.array(val_dataset)

print (len(chord_vocab))

5463


In [3]:
class MusicDataLoader(DataLoader):
    def __init__(self, dataset, batch_size, shuffle=True):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.p = 0.95
        self.seq_len = 70
        self.std = 5
        
    def sample_seq_len_(self):
        rand_p = np.random.random_sample()
        if rand_p < self.p:
            seq_mean = self.seq_len
        else:
            seq_mean = self.seq_len // 2
        return int(np.random.normal(seq_mean, self.std))

    def __iter__(self):
        if self.shuffle:
            rand_idx = np.random.permutation(len(self.dataset))
        else:
            rand_idx = np.arange(len(self.dataset))
        concate_dataset = torch.from_numpy(np.hstack(self.dataset[rand_idx]))
        num_iter = len(concate_dataset) // self.batch_size
        concate_dataset = concate_dataset[:num_iter*self.batch_size].view(self.batch_size, -1)
        concate_dataset.transpose_(0,1)
        index = 0
        while index < len(concate_dataset):
            seq_len = self.sample_seq_len_();
            if index + seq_len > len(concate_dataset):
                break
            yield concate_dataset[index:index+seq_len-1], concate_dataset[index+1:index+seq_len]
            index += seq_len

In [4]:
# model

class MusicModel(nn.Module):

    def __init__(self, note_size, embed_size, nlayers):
        super(MusicModel, self).__init__()
        self.embedding = nn.Embedding(note_size, embed_size)
        self.rnn = nn.LSTM(input_size=embed_size, hidden_size=embed_size, num_layers=nlayers, dropout=0.5)
        self.linear = nn.Linear(embed_size, note_size)
        self.linear.weight = self.embedding.weight
        
        self.init_weight()
        
    def init_weight(self):
        self.embedding.weight.data.uniform_(-0.1, 0.1)

    def forward(self, seq_batch): # L x B
        seq_batch = self.embedding(seq_batch) # L x B x E
        seq_batch, hidden = self.rnn(seq_batch) # L x B x H
        seq_batch = self.linear(seq_batch)
        return seq_batch, hidden
    
    def generate(self, seq, n_notes):
        generated_notes = []
        embed = self.embedding(seq).unsqueeze(1) # L x 1 x E
        output_lstm, hidden = self.rnn(embed) # L x 1 x H
        output = output_lstm[-1] # 1 x H
        logits = self.linear(output) # 1 x V
        scores = F.gumbel_softmax(logits)
        _,current_note = torch.max(scores,dim=1) # 1 x 1
        generated_notes.append(current_note)
        if n_notes > 1:
            for i in range(n_notes-1):
                embed = self.embedding(current_note).unsqueeze(0) # 1 x 1 x E
                output_lstm, hidden = self.rnn(embed, hidden) # 1 x 1 x H
                output = output_lstm[0] # 1 x H
                logits = self.linear(output) # V
                scores = F.gumbel_softmax(logits)
                _,current_note = torch.max(scores,dim=1) # 1
                generated_notes.append(current_note)
        return torch.cat(generated_notes,dim=0)

In [5]:
# model trainer
class MusicModelTrainer:
    def __init__(self, model, train_loader, val_loader, max_epochs=1, run_id='exp'):
        self.model = model.to(DEVICE)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.train_losses = []
        self.val_losses = []
        self.epochs = 0
        self.max_epochs = max_epochs
        self.run_id = run_id
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3, weight_decay=1.2e-6)
        self.criterion = nn.CrossEntropyLoss(reduction="sum")

    def train(self):
        self.model.train() # set to training mode
        epoch_loss = 0
        num_batches = 0
        n_notes = 0
        for inputs, targets in self.train_loader:
            num_batches += 1
            batch_loss, n_note = self.train_batch(inputs, targets)
            epoch_loss += batch_loss
            n_notes += n_note
            if (num_batches % 100 == 0):
                print ('[TRAIN]  Iter [%d]   Loss: %.4f'
                          % (num_batches, batch_loss / n_note))
        epoch_loss = epoch_loss / n_notes
        self.epochs += 1
        print('[TRAIN]  Epoch [%d/%d]   Loss: %.4f'
                      % (self.epochs, self.max_epochs, epoch_loss))
        self.train_losses.append(epoch_loss)

    def train_batch(self, inputs, targets):
        inputs = inputs.to(DEVICE)
        targets = targets.to(DEVICE)
        output, hidden = self.model(inputs)
        loss = self.criterion(output.view(-1, output.size(2)), targets.contiguous().view(-1))
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.item(), output.size(0) * output.size(1)
    
    def evaluate(self):
        self.model.eval()
        epoch_loss = 0
        n_notes = 0
        with torch.no_grad():
            for inputs, targets in self.val_loader:
                inputs = inputs.to(DEVICE)
                targets = targets.to(DEVICE)
                output, hidden = self.model(inputs)
                loss = self.criterion(output.view(-1, output.size(2)), targets.contiguous().view(-1))
                epoch_loss += loss.item()
                n_notes += output.size(0) * output.size(1)
            epoch_loss = epoch_loss / n_notes
            print('[VAL] Val Loss: %.4f' % epoch_loss)
            self.val_losses.append(epoch_loss)
    
    def save(self):
        model_path = os.path.join('./experiments', self.run_id, 'model-{}.pt'.format(self.epochs))
        torch.save(self.model.state_dict(), model_path)
    
    def load(self, model_path):
        self.model.load_state_dict(torch.load(model_path))
        print ("loaded model")
    
    def generate(self, seed, n_notes):
        self.model.eval()
        seq = np.array(seed.split(), dtype=int)
        seq = torch.from_numpy(seq).to(DEVICE)
        output = model.generate(seq, n_notes)
        return output.cpu().detach().numpy()

In [6]:
# Hyper parameters

NUM_EPOCHS = 20
BATCH_SIZE = 32

In [7]:
run_id = "guitar_3layer"
if not os.path.exists('./experiments'):
    os.mkdir('./experiments')
if not os.path.exists('./experiments/%s' % run_id):
    os.mkdir('./experiments/%s' % run_id)
print("Saving models, predictions, and generated words to ./experiments/%s" % run_id)

Saving models, predictions, and generated words to ./experiments/guitar_3layer


In [8]:
model = MusicModel(len(chord_vocab)+1, 512, 3)
train_loader = MusicDataLoader(train_dataset, batch_size=BATCH_SIZE)
val_loader = MusicDataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
trainer = MusicModelTrainer(model=model, train_loader=train_loader, val_loader=val_loader,
                            max_epochs=NUM_EPOCHS, run_id=run_id)
print (model)

MusicModel(
  (embedding): Embedding(5464, 512)
  (rnn): LSTM(512, 512, num_layers=3, dropout=0.5)
  (linear): Linear(in_features=512, out_features=5464, bias=True)
)


In [9]:
best_nll = 1e30  # set to super large value at first
for epoch in range(NUM_EPOCHS):
    trainer.train()
    nll = trainer.evaluate()
trainer.save()

[TRAIN]  Iter [100]   Loss: 4.0143
[TRAIN]  Iter [200]   Loss: 4.0942
[TRAIN]  Iter [300]   Loss: 3.5999
[TRAIN]  Iter [400]   Loss: 2.8067
[TRAIN]  Iter [500]   Loss: 2.7401
[TRAIN]  Epoch [1/20]   Loss: 3.5241
[VAL] Val Loss: 2.7756
[TRAIN]  Iter [100]   Loss: 2.2352
[TRAIN]  Iter [200]   Loss: 1.9899
[TRAIN]  Iter [300]   Loss: 1.8114
[TRAIN]  Iter [400]   Loss: 1.8598
[TRAIN]  Iter [500]   Loss: 2.0215
[TRAIN]  Epoch [2/20]   Loss: 1.9984
[VAL] Val Loss: 2.0590
[TRAIN]  Iter [100]   Loss: 1.4650
[TRAIN]  Iter [200]   Loss: 1.6468
[TRAIN]  Iter [300]   Loss: 1.7959
[TRAIN]  Iter [400]   Loss: 1.4372
[TRAIN]  Iter [500]   Loss: 1.3686
[TRAIN]  Epoch [3/20]   Loss: 1.5846
[VAL] Val Loss: 1.7966
[TRAIN]  Iter [100]   Loss: 1.1002
[TRAIN]  Iter [200]   Loss: 1.3995
[TRAIN]  Iter [300]   Loss: 1.6051
[TRAIN]  Iter [400]   Loss: 1.3336
[TRAIN]  Iter [500]   Loss: 1.6594
[TRAIN]  Epoch [4/20]   Loss: 1.3875
[VAL] Val Loss: 1.6760
[TRAIN]  Iter [100]   Loss: 1.2816
[TRAIN]  Iter [200]   Los

In [16]:
# trainer.load('./experiments/guitar/model-15.pt')
# from collections import Counter
# start = []
# for song in dataset:
#     start.append(song[0])
# Counter(start).most_common

gen = np.array([17] + list(trainer.generate("17", 800)))
print (gen)

[  17   16   16   16   16   17   17  109  109  109  109  106 5463  106
 5463  106 5463  106 5463  106 5463  106 5463  106   43    0   16 5463
   16 5463   16 5463   16   12 5463   12   27    0    0   12   16    0
   16 5463   16 1287  865  865  865  865    0    0   14   16 5463   16
   16   16   16 5463   16  308 5463  308 5463  308   16   16   16  865
  865  865  865   36    0    0    0   14   16    0   16    0 1254    0
    0   16   16    0   16    0  353    0    0 1275 5463 1275    0 1275
 5463 1275 5463 1275 1275   14 5463   14   30   28   16 5463   16  587
 5463  587 5463  587 5463  587 5463  587    0    0  105   38   16   16
 5463   16   16   16   16  208  208  208  208 5463  208  208 1235  105
  105  105  105  105  105  105  105  105  105    0    0    0    0    0
    0    0    0  105  105 5463  105  105    0    0  105 5463  105  334
  105    0  105  105 5463  105  267 5463  267 5463  267 5463  267 5463
  267 5463  267 5463  267 5463  267 5463  267  105  105  105 5463  105
 5463 

In [17]:
def gen_one_hot_bass():
    gen_one_hot = []

    for i in range(len(gen)):
        if gen[i] == 128:
            continue
        one_hot = np.zeros((128,1))
        if gen[i] != 0:
            if i == 0 or gen[i] != gen[i-1]:
                one_hot[gen[i]] = 1
            else:
                one_hot[gen[i]] = 0.5
        gen_one_hot.append(one_hot)

    gen_one_hot = np.hstack(gen_one_hot)
    np.save('./try_bass.npy', gen_one_hot)

def gen_one_hot_guitar():
    gen_one_hot = []
    
    for i in range(len(gen)):
        if gen[i] == len(chord_vocab):
            continue
        one_hot = np.zeros((128, 1))
        chord = chord_vocab[gen[i]]
        if i == 0 or gen[i] != gen[i-1]:
            for c in chord:
                one_hot[c] = 1
        else:
            for c in chord:
                one_hot[c] = 0.5
        gen_one_hot.append(one_hot)
    gen_one_hot = np.hstack(gen_one_hot)
    np.save('./try_guitar.npy', gen_one_hot)

gen_one_hot_guitar()