In [None]:
!apt install musescore

In [None]:
!pip install musicautobot

In [None]:
!pip install fastai==1.0.61

In [5]:
!pip show fastai

Name: fastai
Version: 1.0.61
Summary: fastai makes deep learning with PyTorch faster, more accurate, and easier
Home-page: https://github.com/fastai/fastai
Author: Jeremy Howard
Author-email: info@fast.ai
License: Apache Software License 2.0
Location: /usr/local/lib/python3.7/dist-packages
Requires: matplotlib, numexpr, fastprogress, nvidia-ml-py3, torchvision, spacy, Pillow, numpy, beautifulsoup4, packaging, bottleneck, torch, pyyaml, pandas, scipy, requests
Required-by: 


In [7]:
from pathlib import Path
import random
import numpy as np
import music21
import musicautobot
from musicautobot.music_transformer import transform
from musicautobot.vocab import MusicVocab
from tqdm import tqdm

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torch.nn.functional import softmax
from matplotlib import pyplot as plt 

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {DEVICE}.")
SEQUENCE_LENGTH = 15

Using cuda.


In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
DATA_PATH = Path("drive/MyDrive/projectNLP/data")
PT_PATH = DATA_PATH / "processed"
vocab = MusicVocab.create()

In [10]:
def get_filepaths():
    for subdir in (PT_PATH).iterdir():
      for track_filepath in subdir.iterdir():
        yield track_filepath

In [11]:
files = list(get_filepaths())

In [12]:
TEST_TRAIN_RATIO = 0.2
total_num_files = len(files)
tst_num_files = int(TEST_TRAIN_RATIO * total_num_files)
trn_num_files = total_num_files - tst_num_files
print( f"train test num_files: {trn_num_files}, {tst_num_files}")

train test num_files: 1088, 271


In [13]:
train_files, test_files = torch.utils.data.random_split(files, [trn_num_files, tst_num_files], generator=torch.Generator().manual_seed(42))
train_files = list(train_files)[:20]

In [14]:
train_files

[PosixPath('drive/MyDrive/projectNLP/data/processed/Henry Butler/Down by the Riverside.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Frankie Goes to Hollywood/Relax.2.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Led Zeppelin/When the Levee Breaks.1.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Hall & Oates/Did It In A Minute.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Dire Straits/Sultans of Swing.11.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Jimi Hendrix/All Along The Watchtower.1.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Mariah Carey/Make It Happen.1.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Mariah Carey/Someday.1.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Billy Idol/(Do Not) Stand in the Shadows.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Gino Paoli/Il cielo in una stanza.pt'),
 PosixPath('drive/MyDrive/projectNLP/data/processed/Jackson Michael/Rock With You

In [28]:
class NotesFile(torch.utils.data.Dataset):
    def __init__(self, file, sequence_length=SEQUENCE_LENGTH, device=DEVICE):
        self.path = file
        self.notes = torch.load(file).to_tensor()
        self.sequence_length = sequence_length
        self.device = device

    def __len__(self):
        return len(self.notes) - self.sequence_length-1

    def __getitem__(self, index):
        return (
            self.notes[index:index+self.sequence_length],
            self.notes[index+1:index+self.sequence_length+1]
        )

class NotesDataset(torch.utils.data.Dataset):
    def __init__(self, files, sequence_length=SEQUENCE_LENGTH, device=DEVICE):
        self.files = [NotesFile(fn, sequence_length, device) for fn in files]
        self.cum_sum = [len(notes) for notes in self.files]
        for i in range(1, len(self.files)):
          self.cum_sum[i] += self.cum_sum[i-1]
        self.sequence_length = sequence_length
        self.device = device

    def __len__(self):
        return self.cum_sum[-1]

    def __getitem__(self, index):
        assert index >= 0 and index < len(self)
        file = 0
        while len(self.files[file]) < index:
          index -= len(self.files[file])
          file += 1
        return self.files[file][index]
        
train_dataset = NotesDataset(train_files)       

In [29]:
print(len(train_dataset))
print(train_dataset[0])

234966
(tensor([  0,   1,   8, 165,  88, 139,  76, 139,   8, 141,  86, 153,  74, 154,
          8], device='cuda:0'), tensor([  1,   8, 165,  88, 139,  76, 139,   8, 141,  86, 153,  74, 154,   8,
        141], device='cuda:0'))


In [30]:
class LSTMModel(nn.Module):
    def __init__(self, device=DEVICE, lstm_size=512, embedding_dim=256, num_layers=2, dropout=0.2):
        super(LSTMModel, self).__init__()
        self.lstm_size = lstm_size # hidden_size  
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers
        self.dropout = dropout
        self.device = device
        

        self.max_bar_len = 1024
        self.embedding = nn.Embedding(
            num_embeddings=self.max_bar_len,
            embedding_dim=self.embedding_dim,
        )
        self.lstm = nn.LSTM(
            input_size=self.embedding_dim,
            hidden_size=self.lstm_size,
            num_layers=self.num_layers,
            dropout=self.dropout,
            batch_first=True,
        )
        self.fc = nn.Linear(self.lstm_size, self.max_bar_len)

    def forward(self, x, prev_state):
        embed = self.embedding(x)
        # print(f"embed.shape=\t{embed.shape}")
        output, state = self.lstm(embed, prev_state)
        # print(f"output.shape=\t{output.shape}")
        logits = self.fc(output)
        return logits, state

    def init_state(self, batch_size):
        if batch_size == 1:
          return (torch.zeros(self.num_layers, self.lstm_size).to(self.device),
                torch.zeros(self.num_layers, self.lstm_size).to(self.device))
        return (torch.zeros(self.num_layers, batch_size, self.lstm_size).to(self.device),
                torch.zeros(self.num_layers, batch_size, self.lstm_size).to(self.device))

In [31]:
model = LSTMModel() 
model.to(DEVICE)

LSTMModel(
  (embedding): Embedding(1024, 256)
  (lstm): LSTM(256, 512, num_layers=2, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=512, out_features=1024, bias=True)
)

In [44]:
def train(dataset, model, max_epochs = 50, batch_size=512, log_every=100, verbose=True, model_fn = DATA_PATH / 'model_v1.model'):
    model.train()

    dataloader = DataLoader(dataset, batch_size=batch_size, drop_last=True, pin_memory=False)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    opt_loss = 1000.0
    try:
        for epoch in range(max_epochs):
            state_h, state_c = model.init_state(batch_size)
            
            for batch, (x, y) in enumerate(dataloader):
                # print(batch)
                optimizer.zero_grad()
                y_pred, (state_h, state_c) = model(x, (state_h, state_c))

                # y_pred.shape = (batch_size, seq_len, num_classes); 
                # dimentions seq_len and num_classes need to be swapped for CrossEntropyLoss
                loss = criterion(y_pred.transpose(1, 2), y)

                if opt_loss > loss:
                  opt_loss = loss.item()
                  torch.save(model.state_dict(), model_fn)
                  if verbose:
                    print({ 'epoch': epoch, 'batch': batch, 'improved_loss': loss.item() })  

                state_h = state_h.detach()
                state_c = state_c.detach()            


                loss.backward()
                optimizer.step()

                if batch % log_every == 0:
                  print({ 'epoch': epoch, 'batch': batch, 'loss': loss.item() })
            
    except KeyboardInterrupt:
        pass

    model = LSTMModel() 
    model.load_state_dict(torch.load(model_fn))
    model.to(DEVICE)

    print({ 'opt_loss': opt_loss })

In [48]:
train(train_dataset, model)  

{'epoch': 0, 'batch': 0, 'improved_loss': 2.5826826095581055}
{'epoch': 0, 'batch': 0, 'loss': 2.5826826095581055}
{'epoch': 0, 'batch': 2, 'improved_loss': 2.564356565475464}
{'epoch': 0, 'batch': 5, 'improved_loss': 2.5514607429504395}
{'epoch': 0, 'batch': 7, 'improved_loss': 2.4241926670074463}
{'epoch': 0, 'batch': 8, 'improved_loss': 2.300684928894043}
{'epoch': 0, 'batch': 9, 'improved_loss': 2.2097630500793457}
{'epoch': 0, 'batch': 14, 'improved_loss': 2.1770198345184326}
{'epoch': 0, 'batch': 17, 'improved_loss': 1.6770827770233154}
{'epoch': 0, 'batch': 18, 'improved_loss': 1.2539886236190796}
{'epoch': 0, 'batch': 26, 'improved_loss': 1.2246315479278564}
{'epoch': 0, 'batch': 37, 'improved_loss': 1.198785424232483}
{'epoch': 0, 'batch': 43, 'improved_loss': 1.0894125699996948}
{'epoch': 0, 'batch': 47, 'improved_loss': 1.0286377668380737}
{'epoch': 0, 'batch': 49, 'improved_loss': 0.955910861492157}
{'epoch': 0, 'batch': 55, 'improved_loss': 0.8336182832717896}
{'epoch': 0,

In [None]:
# load model if needed.

# model = LSTMModel() 
# model.load_state_dict(torch.load(DATA_PATH / 'model_v1.model'))
# model.to(DEVICE)

In [46]:
# The predict function is a text generator. You have to modify this code!


def predict_single_word( model, state_h, state_c, prompt_words, temp):
  x = prompt_words.to(DEVICE)
  # y_pred.shape = (seq_len, num_classes)
  y_pred, (state_h, state_c) = model(x, (state_h, state_c))
  # last_word_logits.shape =  (num_classes)
  last_word_logits = y_pred[-1]/temp
  p = softmax(last_word_logits, dim=0).detach().cpu().numpy()
  word_index = np.random.choice(len(last_word_logits), p=p)
  # word_index = np.argmax(p)
  return word_index, (state_h, state_c)


def predict(model, prompt, next_words=15, temp=0.1, textify=True):
    model.eval()
    batch_size=1
    state_h, state_c = model.init_state(batch_size)

    ret = prompt.tolist()
    if textify:
      print(f"prompt:\n{vocab.textify(ret)}")
    next_word, (state_h, state_c) = predict_single_word( model, state_h, state_c, prompt, temp=temp)
    ret.append(next_word)

    for i in range(1, next_words):
        x = torch.tensor([next_word])
        next_word, (state_h, state_c) = predict_single_word( model, state_h, state_c, x, temp=temp)
        ret.append(next_word)

    if textify:
        ret = vocab.textify(ret)

    return ret

In [47]:
predict(model, train_dataset[10][0])

prompt:
n77 d16 n65 d17 xxsep d4 n29 d2 xxsep d4 n31 d3 xxsep d4 n33


'n77 d16 n65 d17 xxsep d4 n29 d2 xxsep d4 n31 d3 xxsep d4 n33 d2 xxsep d2 n63 d2 n59 d2 n54 d2 n51 d2 n46 d2 n39 d2'

In [None]:
# model_fn = DATA_PATH / 'model_v1.model'
# torch.save(model.state_dict(), model_fn)

# model_wo = LSTMModel() 
# model_wo.load_state_dict(torch.load(model_fn))
# model_wo.to(DEVICE)