In [1]:


import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


In [2]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [None]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

In [None]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

## Exploration seq2seq



In [2]:
import pickle
import random
import time
import math
from pandas import DataFrame
from torch.utils.data import Dataset
import pdb

from torch.utils.data import Dataset, DataLoader

In [3]:
# 
MAX_LOUDNESS = 127
MAX_NOTE = 127

with open("/home/zar3bski/Documents/Code/data/wavaetro/data/maestro-v3.0.0/2004_sym13.pickle", "rb") as file: 
    df:DataFrame = pickle.load(file)



In [4]:
# https://www.kaggle.com/code/columbine/seq2seq-pytorch


class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        self.rnn = nn.LSTM(emb_dim, hid_dim, num_layers=n_layers, dropout=dropout)
        
        self.dropout = nn.Dropout(dropout)
    def forward(self, src):
        # src : [sen_len, batch_size]
        embedded = self.dropout(self.embedding(src))
        
        # embedded : [sen_len, batch_size, emb_dim]
        outputs, (hidden, cell) = self.rnn(embedded)
        # outputs = [sen_len, batch_size, hid_dim * n_directions]
        # hidden = [n_layers * n_direction, batch_size, hid_dim]
        # cell = [n_layers * n_direction, batch_size, hid_dim]
        return hidden, cell



In [5]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.output_dim = output_dim
        self.emb_dim = emb_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(output_dim, emb_dim)
        
        self.rnn = nn.LSTM(emb_dim, hid_dim, num_layers=self.n_layers, dropout=dropout)
        
        self.fc_out = nn.Linear(hid_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden, cell):
        
        # input = [batch_size]
        # hidden = [n_layers * n_dir, batch_size, hid_dim]
        # cell = [n_layers * n_dir, batch_size, hid_dim]
        
        input = input.unsqueeze(0)
        # input : [1, ,batch_size]
        
        embedded = self.dropout(self.embedding(input))
        # embedded = [1, batch_size, emb_dim]
        
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        # output = [seq_len, batch_size, hid_dim * n_dir]
        # hidden = [n_layers * n_dir, batch_size, hid_dim]
        # cell = [n_layers * n_dir, batch_size, hid_dim]
        
        # seq_len and n_dir will always be 1 in the decoder
        prediction = self.fc_out(output.squeeze(0))
        # prediction = [batch_size, output_dim]
        return prediction, hidden, cell

In [6]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
        assert encoder.hid_dim == decoder.hid_dim
        assert encoder.n_layers == decoder.n_layers
        
        
        
    def forward(self, src:list, trg, teacher_forcing_ratio=0.5):
        # src = [sen_len, batch_size]
        # trg = [sen_len, batch_size]
        # teacher_forcing_ratio : the probability to use the teacher forcing.
        batch_size = len(src)
        trg_len = len(trg)
        trg_vocab_size = self.decoder.output_dim
        
        # tensor to store decoder outputs
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
        # ICI ça passe
        
        # last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden, cell = self.encoder(src)
        
        # first input to the decoder is the <sos> token.
        input = trg[0, :]
        for t in range(1, trg_len):
            # insert input token embedding, previous hidden and previous cell states 
            # receive output tensor (predictions) and new hidden and cell states.
            output, hidden, cell = self.decoder(input, hidden, cell)
            
            # replace predictions in a tensor holding predictions for each token
            outputs[t] = output
            
            # decide if we are going to use teacher forcing or not.
            teacher_force = random.random() < teacher_forcing_ratio
            
            # get the highest predicted token from our predictions.
            top1 = output.argmax(1)
            # update input : use ground_truth when teacher_force 
            input = trg[t] if teacher_force else top1
            
        return outputs

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class MySet(Dataset): 
    def __init__(self, dataframe):
        self.sym13 = dataframe["sym13"]
        self.midi_notes = dataframe["midi_notes"]

    def __len__(self):
        return len(self.sym13)
    
    def __getitem__(self, idx):
        sym13 = self.sym13[idx]
        midi_notes = self.midi_notes[idx]
        return {"sym13":sym13, "midi_notes":midi_notes}

def collate_wavelet(batch): 
    input = [torch.tensor(x["sym13"]) for x in batch]
    target = [torch.tensor(x["midi_notes"]) for x in batch]
    return input, target

df.sample(35, replace=True)

train_data =  MySet(df.sample(round(len(df)*0.6)).reset_index(inplace=False)) 
valid_data = MySet(df.sample(round(len(df)*0.4)).reset_index(inplace=False) )
test_data = MySet(df.sample(round(len(df)*0.2)).reset_index(inplace=False) )

BATCH_SIZE = 128
# We use a BucketIterator instead of the standard Iterator as it create batches in such a way that it minimizes the amount 
# of padding in both the source and target sentences.
train_iter = DataLoader(train_data, batch_size=BATCH_SIZE, collate_fn=collate_wavelet)
valid_iter = DataLoader(valid_data, batch_size=BATCH_SIZE, collate_fn=collate_wavelet)
test_iter = DataLoader(test_data, batch_size=BATCH_SIZE, collate_fn=collate_wavelet)

INPUT_DIM = len(df)
OUTPUT_DIM = len(df)
ENC_EMB_DIM = 128
DEC_EMB_DIM = 128
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
decoder = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

del df


In [9]:
#i = 0
#for batch in train_iter:
#    b = batch
#    i += 1
#    if i > 1: 
#        break

next(iter(train_iter))

([tensor([[-1.9520, -2.5907],
          [ 4.0641, -0.9448],
          [-0.6219, -1.9365],
          ...,
          [-1.5870,  0.2753],
          [ 1.7409,  0.0081],
          [-5.4818, -0.8124]], dtype=torch.float64),
  tensor([[ 0.5840, -1.2531],
          [ 2.5744, -0.0192],
          [-6.2650, -1.2578],
          ...,
          [-0.4185, -0.7821],
          [ 1.0146,  1.6577],
          [ 0.8916, -2.9571]], dtype=torch.float64),
  tensor([[-1.2517,  0.7547],
          [-4.6351, -2.9741],
          [ 9.8361,  8.0403],
          ...,
          [-0.2046,  0.2544],
          [-1.0759,  0.0202],
          [-1.0229,  1.3263]], dtype=torch.float64),
  tensor([[  6.0274,  -6.3221],
          [ 10.2162,  -0.7321],
          [ -3.4835,  -3.7290],
          ...,
          [  7.6696,   3.1985],
          [ -0.6773, -11.9212],
          [  6.1902, -16.5063]], dtype=torch.float64),
  tensor([[  1.6696,  -1.6760],
          [ -2.6921,   2.2933],
          [ -0.7913,   0.6379],
          ...,
     

In [58]:
df

Unnamed: 0,year,ticks_per_beat,mid,midi_notes,midi_velocity,midi_time,sampling_frequency,sym13
0,2004,480,"MidiFile(type=1, ticks_per_beat=480, tracks=[\...","[71, 71, 55, 71, 59, 55, 59, 62, 62, 72, 71, 6...","[60, 0, 44, 54, 55, 0, 0, 52, 0, 76, 0, 56, 0,...","[0.13333333333333333, 0.0010416666666666667, 0...",44100,"[[6.6829083086832854, 1.2684792957869424], [-0..."
1,2004,480,"MidiFile(type=1, ticks_per_beat=480, tracks=[\...","[43, 31, 31, 43, 44, 32, 46, 34, 44, 32, 34, 4...","[43, 36, 0, 0, 45, 35, 55, 38, 0, 0, 0, 0, 56,...","[1.0322916666666666, 0.008333333333333333, 0.0...",44100,"[[-2.3087699720057997, 2.7493433056420518], [0..."
2,2004,480,"MidiFile(type=1, ticks_per_beat=480, tracks=[\...","[38, 45, 38, 45, 53, 53, 62, 62, 74, 67, 53, 6...","[37, 41, 0, 0, 53, 0, 58, 0, 46, 44, 38, 0, 59...","[0.041666666666666664, 0.11458333333333333, 0....",44100,"[[-0.577860502384477, 0.8078359385863222], [-0..."
3,2004,480,"MidiFile(type=1, ticks_per_beat=480, tracks=[\...","[55, 43, 31, 31, 43, 55, 62, 50, 55, 77, 71, 8...","[97, 100, 90, 0, 0, 0, 93, 89, 85, 90, 96, 92,...","[0.029166666666666667, 0.0020833333333333333, ...",44100,"[[0.00558889660656537, -2.295053071159729], [1..."
4,2004,480,"MidiFile(type=1, ticks_per_beat=480, tracks=[\...","[32, 75, 32, 51, 68, 51, 75, 72, 56, 68, 56, 6...","[19, 45, 0, 25, 21, 0, 0, 27, 40, 0, 0, 29, 32...","[0.9864583333333333, 0.196875, 0.04375, 0.4104...",44100,"[[0.1594345508771419, 0.5193846127925084], [2...."
...,...,...,...,...,...,...,...,...
127,2004,480,"MidiFile(type=1, ticks_per_beat=480, tracks=[\...","[67, 65, 67, 70, 65, 72, 70, 77, 72, 74, 77, 7...","[89, 88, 0, 85, 0, 82, 0, 82, 0, 84, 0, 81, 0,...","[1.078125, 0.01875, 0.0010416666666666667, 0.0...",44100,"[[-0.8336377828470809, -0.4952179091205502], [..."
128,2004,480,"MidiFile(type=1, ticks_per_beat=480, tracks=[\...","[60, 36, 48, 51, 55, 51, 48, 60, 55, 36, 60, 5...","[93, 83, 80, 81, 82, 0, 0, 0, 0, 0, 80, 75, 76...","[0.596875, 0.005208333333333333, 0.00729166666...",44100,"[[-0.6426064856535821, -0.1041292077125284], [..."
129,2004,480,"MidiFile(type=1, ticks_per_beat=480, tracks=[\...","[68, 60, 51, 44, 51, 60, 44, 68, 68, 61, 51, 4...","[58, 41, 28, 35, 0, 0, 0, 0, 61, 43, 32, 38, 6...","[1.003125, 0.053125, 0.007291666666666667, 0.0...",44100,"[[2.0107936641664907, -0.3399585682533319], [-..."
130,2004,480,"MidiFile(type=1, ticks_per_beat=480, tracks=[\...","[75, 48, 63, 63, 55, 75, 77, 55, 65, 50, 48, 6...","[52, 27, 31, 0, 35, 0, 66, 0, 30, 44, 0, 0, 37...","[0.03958333333333333, 0.035416666666666666, 0....",44100,"[[-1.0797728625021672, 0.3066298351203736], [0..."


In [8]:
model = Seq2Seq(encoder, decoder, device).to(device)

In [9]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)



Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(132, 128)
    (rnn): LSTM(128, 512, num_layers=2, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder(
    (embedding): Embedding(132, 128)
    (rnn): LSTM(128, 512, num_layers=2, dropout=0.5)
    (fc_out): Linear(in_features=512, out_features=132, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [10]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')



The model has 6,933,636 trainable parameters


In [11]:


optimizer = optim.Adam(model.parameters())

#TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

criterion = nn.CrossEntropyLoss()



In [12]:
def train(model, iterator:DataLoader, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        src = batch[0]
        trg = batch[1]
        
        optimizer.zero_grad()
        # trg = [sen_len, batch_size]
        # output = [trg_len, batch_size, output_dim]
        output = model(src, trg)
        output_dim = output.shape[-1]
        
        # transfrom our output : slice off the first column, and flatten the output into 2 dim.
        output = output[1:].view(-1, output_dim) 
        trg = trg[1:].view(-1)
        # trg = [(trg_len-1) * batch_size]
        # output = [(trg_len-1) * batch_size, output_dim]
        
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [13]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
        
        for i, batch in enumerate(iterator):
            
            
            src = batch[0]
            trg = batch[1]
            
            output = model(src, trg, 0) # turn off teacher forcing.
            
            # trg = [sen_len, batch_size]
            # output = [sen_len, batch_size, output_dim]
            output_dim = output.shape[-1]
            
            output = output[1:].view(-1, output_dim)
            trg = trg[1:].view(-1)
            
            loss = criterion(output, trg)
            
            epoch_loss += loss.item()
            
    return epoch_loss / len(iterator)

In [14]:
# a function that used to tell us how long an epoch takes.
def epoch_time(start_time, end_time):
    
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time  / 60)
    elapsed_secs = int(elapsed_time -  (elapsed_mins * 60))
    return  elapsed_mins, elapsed_secs


In [15]:
N_EPOCHS = 10

CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_iter, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iter, criterion)
    
    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'Seq2SeqModel.pt')
    print(f"Epoch: {epoch+1:02} | Time {epoch_mins}m {epoch_secs}s")
    print(f"\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}")
    print(f"\tValid Loss: {valid_loss:.3f} | Valid PPL: {math.exp(valid_loss):7.3f}")



TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not list