In [1]:
import torch
import torch.nn as nn
import torch.utils as utils
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
from time import time

In [2]:
class MyDataset(Dataset):
    def __init__(self, sequence_length, total_batch=-1, step_interval=1, random_choice=True, transpose=0):

        # invalid argument handling ---------------------------------------------
        if not isinstance(sequence_length, int):
            raise TypeError("value for sequence_length must be an integer")
        if not isinstance(total_batch, int):
            raise TypeError("value for total_batch must be an integer")
        if not isinstance(step_interval, int):
            raise TypeError("value for skip_interval must be an integer")
        if not isinstance(random_choice, bool):
            raise TypeError("value for random_choice must be a boolean")
        if not isinstance(transpose, int) or transpose < 0:
            raise ValueError("value for transpose must be a non-negative integer")
        # -----------------------------------------------------------------------

        super(MyDataset, self).__init__()

        # conversion of text into note tensor------------------------------------
        data_nums = open('./source/essence_text.txt', 'r', encoding='utf-8').read().split(' ')
        data_len = len(data_nums)
        self.data = torch.zeros(data_len, 129)
        note_indices = [int(note_num) for note_num in data_nums]
        self.data[range(data_len), note_indices] = 1
        highest_note = max(note_indices)
        if highest_note == 0:
            raise ValueError("The data string contains no notes at all!")
        lowest_note = min(note_num for note_num in note_indices if note_num != 0)
        # -----------------------------------------------------------------------
        
        # sequence_length check--------------------------------------------------
        if sequence_length < 1:
            self.sequence_length = data_len - 1
        else:
            self.sequence_length = min(sequence_length, data_len - 1)
        # -----------------------------------------------------------------------

        # total_batch and step_interval check------------------------------------
        if total_batch > 0:
            self.total_batch = min(total_batch, data_len - self.sequence_length)
            self.step_interval = int((data_len - self.sequence_length) / self.total_batch)
        elif step_interval > 1:
            self.step_interval = step_interval
            self.total_batch = (data_len - self.sequence_length) // self.step_interval
        else:
            self.total_batch = data_len - self.sequence_length
            self.step_interval = 1
        # -----------------------------------------------------------------------
            
        self.random_choice = random_choice
        
        # tranpose values--------------------------------------------------------
        self.transpose = transpose

        lower_margin = lowest_note - 1
        upper_margin = 128 - highest_note

        if lower_margin < self.transpose:
            if upper_margin < self.transpose:
                self.low_transpose = -lower_margin
                self.high_transpose = upper_margin
            else:
                self.low_transpose = -lower_margin
                self.high_transpose = min(upper_margin, 2 * self.transpose - lower_margin)
        elif upper_margin < self.transpose:
            self.low_transpose = -min(upper_margin, 2 * self.transpose - lower_margin)
            self.high_transpose = upper_margin
        else:
            self.low_transpose = -self.transpose
            self.high_transpose = self.transpose
        # -----------------------------------------------------------------------
        
    def __len__(self):
        return self.total_batch
    
    def __getitem__(self, batch_idx):
        
        # invalid index handling -------------------------------------------------
        if not isinstance(batch_idx, int):
            raise IndexError(f"this dataset only takes one integer value as the index, but {type(batch_idx)} was given")
        if not batch_idx < self.total_batch:
            raise IndexError(f"index out of bounds (index > len)")
        if batch_idx < 0:
            batch_idx = self.total_batch + batch_idx
            if batch_idx < 0:
                raise IndexError(f"index out of bounds (index < -len)")
        # ------------------------------------------------------------------------


        # index selection --------------------------------------------------------
        if self.step_interval == 1:
            start_idx = batch_idx
        else:
            if self.random_choice:
                if batch_idx == self.total_batch - 1:
                    # This last index might have a wider range than other indices
                    start_idx = torch.randint(low=(self.step_interval * batch_idx),
                                        high=(len(self.data) - self.sequence_length),
                                        size=(),
                                        dtype=torch.int).item()
                else:
                    start_idx = torch.randint(low=(self.step_interval * batch_idx),
                                        high=(self.step_interval * (batch_idx + 1)),
                                        size=(),
                                        dtype=torch.int).item()
            else:
                start_idx = self.step_interval * batch_idx
        end_idx = start_idx + self.sequence_length + 1  # (Excluding!)
        # -----------------------------------------------------------------------
        
        note_mat = torch.zeros(self.sequence_length + 1, 129)
        note_mat.data[:, 0] = self.data.data[start_idx:end_idx, 0]

        # transpose --------------------------------------------------------------
        if self.transpose > 0:
            transpose_val = torch.randint(low=self.low_transpose, high=self.high_transpose + 1, size=(), dtype=torch.int).item()
            if transpose_val < 0:
                note_mat.data[:, 1:transpose_val] = self.data.data[start_idx:end_idx, -transpose_val + 1:]
            elif transpose_val > 0:
                note_mat.data[:, transpose_val + 1:] = self.data.data[start_idx:end_idx, 1:-transpose_val]
            else:
                note_mat.data[:, 1:] = self.data.data[start_idx:end_idx, 1:]
        else:
            note_mat.data[:, 1:] = self.data.data[start_idx:end_idx, 1:]
        # ------------------------------------------------------------------------

        input_tensor = torch.zeros(self.sequence_length, 129)
        input_tensor.data = note_mat.data[:-1]

        target_tensor = note_mat.data[1:].argmax(dim=1)

        return input_tensor, target_tensor

In [3]:
dataset = MyDataset(400, total_batch=-1, step_interval=1, transpose=0)
print(len(dataset))
# dataset[:50]
dataset[0]

922475


(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54,
         73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54,
         73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54,
         73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54,
         73,  0, 54, 73,  0, 54, 73,  0, 54, 73,  0, 54,  0, 54, 85,  0, 54, 85,
          0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,
          0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,
          0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,
          0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,  0, 54, 85,
         

In [4]:
loader = DataLoader(MyDataset(sequence_length=10, total_batch=-1, step_interval=1, random_choice=False, transpose=0),
                    batch_size=2, shuffle=False, drop_last=False)
print(len(loader))
(lambda list_of_tensors: (list_of_tensors, list_of_tensors[0].shape, list_of_tensors[1].shape))(next(iter(loader)))

461433


([tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [1., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [1., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
  
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [1., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [1., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]]),
  tensor([[73,  0, 54, 73,  0, 54, 73,  0, 54, 73],
          [ 0, 54, 73,  0, 54, 73,  0, 54, 73,  0]])],
 torch.Size([2, 10, 129]),
 torch.Size([2, 10]))

In [5]:
class ProperLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout=0, batch_first=True):
        super(ProperLSTM, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.batch_first = batch_first
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, batch_first=batch_first)
        self.to_notes = nn.Linear(in_features=hidden_size, out_features=output_size)
        
        for param in self.parameters():
            if len(param.shape) == 2:
                nn.init.xavier_normal_(param)
    
    def forward(self, input, hidden):
        output, hidden = self.lstm(input, hidden)
        output = self.to_notes(output)
#         output = self.activation(output)
#         output = self.to_out(output)
        
        return output, hidden
    
    def init_hidden(self, batch_size, cuda=False):
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        if cuda:
            h0 = h0.cuda()
            c0 = c0.cuda()
        
        return (h0, c0)
    
    def num_params(self, requires_grad=False):
        if requires_grad:
            num_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        else:
            num_params = sum(p.numel() for p in self.parameters())
        return num_params
    
    def summarize(self):
        summary = self.__str__()
        summary += f"\nTotal number of parameters : {self.num_params(False)}"
        summary += f"\nTrainable parameters : {self.num_params(True)}"
        return summary

In [6]:
class SkipLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout=0, batch_first=True):
        super(SkipLSTM, self).__init__()
        
        # Hyperparameters to be kept (Others are used only for initialization)
        self.hidden_size = hidden_size
        self.batch_first = batch_first
        
        
        self.layer_norm_0 = nn.LayerNorm(input_size)
        
        self.lstm_1 = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=batch_first)
        self.layer_norm_1 = nn.LayerNorm(hidden_size)
        self.dropout_1 = nn.Dropout(p=dropout)
        
        self.lstm_2 = nn.LSTM(input_size=hidden_size + input_size, hidden_size=hidden_size, batch_first=batch_first)
        self.layer_norm_2 = nn.LayerNorm(hidden_size)
        self.dropout_2 = nn.Dropout(p=dropout)
        
        self.lstm_3 = nn.LSTM(input_size=hidden_size + hidden_size, hidden_size=hidden_size, batch_first=batch_first)
        self.layer_norm_3 = nn.LayerNorm(hidden_size)
        self.dropout_3 = nn.Dropout(p=dropout)
        
        self.to_notes = nn.Linear(in_features=hidden_size + hidden_size, out_features=output_size)
#         self.activation = nn.Sigmoid()
#         self.to_out = nn.LogSoftmax(dim=2)

#         for param in self.parameters():
#             if len(param.shape) == 2:
#                 nn.init.xavier_normal_(param)
 
    def forward(self, input, hiddens):
        assert len(hiddens) == 3 and len(input.shape) == 3

        input_norm = self.layer_norm_0(input)
        
        output_1, hidden_1 = self.lstm_1(input_norm, hiddens[0])
        output_1_norm = self.layer_norm_1(output_1)
        concat_1 = torch.cat((output_1_norm, input_norm), dim=2)
        dropped_1 = self.dropout_1(concat_1)
        
        output_2, hidden_2 = self.lstm_2(dropped_1, hiddens[1])
        output_2_norm = self.layer_norm_2(output_2)
        concat_2 = torch.cat((output_2_norm, output_1_norm), dim=2)
        dropped_2 = self.dropout_2(concat_2)
        
        output_3, hidden_3 = self.lstm_3(dropped_2, hiddens[2])
        output_3_norm = self.layer_norm_3(output_3)
        concat_3 = torch.cat((output_3_norm, output_2_norm), dim=2)
        dropped_3 = self.dropout_3(concat_3)
        
        output = self.to_notes(dropped_3)
#         output = self.activation(output)
#         output = self.to_out(output)
        
        return output, (hidden_1, hidden_2, hidden_3)
    
    def truncated_forward(self, input, hiddens, output_len=-1):
        assert len(hiddens) == 3 and len(input.shape) == 3
        if not isinstance(output_len, int):
            raise TypeError("output_len must be a non-zero integer value")
        if output_len == 0:
            raise ValueError("output_len value cannot be 0")
        if output_len < 0 and -output_len > input.shape[self.batch_first]:
            raise ValueError("output_len value cannot be less than -len(input)")

        if output_len > 0:
            output_len = min(output_len, input.shape[self.batch_first])
        else:
            output_len = input.shape[self.batch_first] + (output_len + 1)
        
        if output_len != input.shape[self.batch_first]:
            with torch.no_grad():
                if self.batch_first:
                    hiddens = self.forward(input[:, :-output_len, :], hiddens)[1]
                else:
                    hiddens = self.forward(input[:-output_len, :, :], hiddens)[1]
        
        if self.batch_first:
            output, hiddens = self.forward(input[:, -output_len:, :], hiddens)
        else:
            output, hiddens = self.forward(input[-output_len:, :, :], hiddens)

        return output, hiddens

    def init_hidden(self, batch_size, cuda=False):
        h0_1 = torch.zeros(1, batch_size, self.hidden_size)
        c0_1 = torch.zeros(1, batch_size, self.hidden_size)
        h0_2 = torch.zeros(1, batch_size, self.hidden_size)
        c0_2 = torch.zeros(1, batch_size, self.hidden_size)
        h0_3 = torch.zeros(1, batch_size, self.hidden_size)
        c0_3 = torch.zeros(1, batch_size, self.hidden_size)
        if cuda:
            h0_1 = h0_1.cuda()
            c0_1 = c0_1.cuda()
            h0_2 = h0_2.cuda()
            c0_2 = c0_2.cuda()
            h0_3 = h0_3.cuda()
            c0_3 = c0_3.cuda()

        return ((h0_1, c0_1), (h0_2, c0_2), (h0_3, c0_3))
    
    def num_params(self, requires_grad=False):
        if requires_grad:
            num_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        else:
            num_params = sum(p.numel() for p in self.parameters())
        return num_params
    
    def summarize(self):
        summary = self.__str__()
        summary += f"\nTotal number of parameters : {self.num_params(False)}"
        summary += f"\nTrainable parameters : {self.num_params(True)}"
        return summary

In [7]:
class DeepLSTM(nn.Module):
    def __init__(self, input_size, output_size, dropout=0):
        super(DeepLSTM, self).__init__()
        
        self.layer_norm_0 = nn.LayerNorm(input_size)

        self.lstm_1 = nn.LSTM(input_size=input_size, hidden_size=300, batch_first=True)
        self.layer_norm_1 = nn.LayerNorm(300)
        self.dropout_1 = nn.Dropout(p=dropout)

        self.lstm_2 = nn.LSTM(input_size=300, hidden_size=300, batch_first=True)
        self.layer_norm_2 = nn.LayerNorm(300)
        self.dropout_2 = nn.Dropout(p=dropout)

        self.lstm_3 = nn.LSTM(input_size=300, hidden_size=300, batch_first=True)
        self.layer_norm_3 = nn.LayerNorm(300)
        self.dropout_3 = nn.Dropout(p=dropout)

        self.lstm_4 = nn.LSTM(input_size=300, hidden_size=300, batch_first=True)
        self.layer_norm_4 = nn.LayerNorm(300)
        self.dropout_4 = nn.Dropout(p=dropout)

        self.lstm_5 = nn.LSTM(input_size=300, hidden_size=300, batch_first=True)
        self.layer_norm_5 = nn.LayerNorm(300)
        self.dropout_5 = nn.Dropout(p=dropout)

        self.lstm_6 = nn.LSTM(input_size=300, hidden_size=300, batch_first=True)
        self.layer_norm_6 = nn.LayerNorm(300)
        self.dropout_6 = nn.Dropout(p=dropout)

        self.lstm_7 = nn.LSTM(input_size=300, hidden_size=300, batch_first=True)
        self.layer_norm_7 = nn.LayerNorm(300)
        self.dropout_7 = nn.Dropout(p=dropout)

        self.lstm_8 = nn.LSTM(input_size=300, hidden_size=300, batch_first=True)
        self.layer_norm_8 = nn.LayerNorm(300)
        self.dropout_8 = nn.Dropout(p=dropout)

        self.to_notes = nn.Linear(in_features=300, out_features=output_size)
 
    def forward(self, input, hiddens):
        assert len(hiddens) == 8

        input_norm = self.layer_norm_0(input)
        
        output_1, hidden_1 = self.lstm_1(input_norm, hiddens[0])
        output_1_norm = self.layer_norm_1(output_1)
        dropped_1 = self.dropout_1(output_1_norm)
        
        output_2, hidden_2 = self.lstm_2(dropped_1, hiddens[1])
        output_2_norm = self.layer_norm_2(output_2)
        dropped_2 = self.dropout_2(output_2_norm)
        
        output_3, hidden_3 = self.lstm_3(dropped_2, hiddens[2])
        output_3_norm = self.layer_norm_3(output_3)
        dropped_3 = self.dropout_3(output_3_norm)
        
        output_4, hidden_4 = self.lstm_4(dropped_3, hiddens[3])
        output_4_norm = self.layer_norm_4(output_4)
        dropped_4 = self.dropout_4(output_4_norm)
        
        output_5, hidden_5 = self.lstm_5(dropped_4, hiddens[4])
        output_5_norm = self.layer_norm_5(output_5)
        dropped_5 = self.dropout_5(output_5_norm)
        
        output_6, hidden_6 = self.lstm_6(dropped_5, hiddens[5])
        output_6_norm = self.layer_norm_6(output_6)
        dropped_6 = self.dropout_6(output_6_norm)

        output_7, hidden_7 = self.lstm_7(dropped_6, hiddens[6])
        output_7_norm = self.layer_norm_7(output_7)
        dropped_7 = self.dropout_7(output_7_norm)

        output_8, hidden_8 = self.lstm_8(dropped_7, hiddens[7])
        output_8_norm = self.layer_norm_8(output_8)
        dropped_8 = self.dropout_8(output_8_norm)

        output = self.to_notes(dropped_8)
        
        return output, (hidden_1, hidden_2, hidden_3, hidden_4, hidden_5, hidden_6, hidden_7, hidden_8)
    
    def init_hidden(self, batch_size, cuda=False):
        h0_1 = torch.zeros(1, batch_size, 300)
        c0_1 = torch.zeros(1, batch_size, 300)
        h0_2 = torch.zeros(1, batch_size, 300)
        c0_2 = torch.zeros(1, batch_size, 300)
        h0_3 = torch.zeros(1, batch_size, 300)
        c0_3 = torch.zeros(1, batch_size, 300)
        h0_4 = torch.zeros(1, batch_size, 300)
        c0_4 = torch.zeros(1, batch_size, 300)
        h0_5 = torch.zeros(1, batch_size, 300)
        c0_5 = torch.zeros(1, batch_size, 300)
        h0_6 = torch.zeros(1, batch_size, 300)
        c0_6 = torch.zeros(1, batch_size, 300)
        h0_7 = torch.zeros(1, batch_size, 300)
        c0_7 = torch.zeros(1, batch_size, 300)
        h0_8 = torch.zeros(1, batch_size, 300)
        c0_8 = torch.zeros(1, batch_size, 300)

        if cuda:
            h0_1 = h0_1.cuda()
            c0_1 = c0_1.cuda()
            h0_2 = h0_2.cuda()
            c0_2 = c0_2.cuda()
            h0_3 = h0_3.cuda()
            c0_3 = c0_3.cuda()
            h0_4 = h0_4.cuda()
            c0_4 = c0_4.cuda()
            h0_5 = h0_5.cuda()
            c0_5 = c0_5.cuda()
            h0_6 = h0_6.cuda()
            c0_6 = c0_6.cuda()
            h0_7 = h0_7.cuda()
            c0_7 = c0_7.cuda()
            h0_8 = h0_8.cuda()
            c0_8 = c0_8.cuda()

        return ((h0_1, c0_1), (h0_2, c0_2), (h0_3, c0_3), (h0_4, c0_4),
                (h0_5, c0_5), (h0_6, c0_6), (h0_7, c0_7), (h0_8, c0_8))
    
    def num_params(self, requires_grad=False):
        if requires_grad:
            num_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        else:
            num_params = sum(p.numel() for p in self.parameters())
        return num_params
    
    def summarize(self):
        summary = self.__str__()
        summary += f"\nTotal number of parameters : {self.num_params(False)}"
        summary += f"\nTrainable parameters : {self.num_params(True)}"
        return summary

In [8]:
class WideLSTM(nn.Module):
    def __init__(self, input_size, output_size, dropout=0):
        super(WideLSTM, self).__init__()
        
        self.layer_norm_0 = nn.LayerNorm(input_size)

        self.lstm_1 = nn.LSTM(input_size=input_size, hidden_size=700, batch_first=True)
        self.layer_norm_1 = nn.LayerNorm(700)
        self.dropout_1 = nn.Dropout(p=dropout)

        self.lstm_2 = nn.LSTM(input_size=700, hidden_size=700, batch_first=True)
        self.layer_norm_2 = nn.LayerNorm(700)
        self.dropout_2 = nn.Dropout(p=dropout)

        self.lstm_3 = nn.LSTM(input_size=700, hidden_size=700, batch_first=True)
        self.layer_norm_3 = nn.LayerNorm(700)
        self.dropout_3 = nn.Dropout(p=dropout)

        self.to_notes = nn.Linear(in_features=700, out_features=output_size)
 
    def forward(self, input, hiddens):
        assert len(hiddens) == 3

        input_norm = self.layer_norm_0(input)
        
        output_1, hidden_1 = self.lstm_1(input_norm, hiddens[0])
        output_1_norm = self.layer_norm_1(output_1)
        dropped_1 = self.dropout_1(output_1_norm)
        
        output_2, hidden_2 = self.lstm_2(dropped_1, hiddens[1])
        output_2_norm = self.layer_norm_2(output_2)
        dropped_2 = self.dropout_2(output_2_norm)
        
        output_3, hidden_3 = self.lstm_3(dropped_2, hiddens[2])
        output_3_norm = self.layer_norm_3(output_3)
        dropped_3 = self.dropout_3(output_3_norm)

        output = self.to_notes(dropped_3)
        hiddens = (hidden_1, hidden_2, hidden_3)
        
        return output, hiddens
    
    def init_hidden(self, batch_size, cuda=False):
        h0_1 = torch.zeros(1, batch_size, 700)
        c0_1 = torch.zeros(1, batch_size, 700)
        h0_2 = torch.zeros(1, batch_size, 700)
        c0_2 = torch.zeros(1, batch_size, 700)
        h0_3 = torch.zeros(1, batch_size, 700)
        c0_3 = torch.zeros(1, batch_size, 700)

        if cuda:
            h0_1 = h0_1.cuda()
            c0_1 = c0_1.cuda()
            h0_2 = h0_2.cuda()
            c0_2 = c0_2.cuda()
            h0_3 = h0_3.cuda()
            c0_3 = c0_3.cuda()

        return ((h0_1, c0_1), (h0_2, c0_2), (h0_3, c0_3))
    
    def num_params(self, requires_grad=False):
        if requires_grad:
            num_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        else:
            num_params = sum(p.numel() for p in self.parameters())
        return num_params
    
    def summarize(self):
        summary = self.__str__()
        summary += f"\nTotal number of parameters : {self.num_params(False)}"
        summary += f"\nTrainable parameters : {self.num_params(True)}"
        return summary

In [9]:
# my_lstm = ProperLSTM(input_size=129, hidden_size=300, output_size=129, num_layers=3, batch_first=True)
# my_lstm = SkipLSTM(input_size=129, hidden_size=300, output_size=129, dropout=0.5, batch_first=True)
# my_lstm = SkipLSTM(input_size=129, hidden_size=700, output_size=129, dropout=0., batch_first=True)  # Big Boy!
my_lstm = DeepLSTM(input_size=129, output_size=129, dropout=0.2)  # Long Boy!!!
# my_lstm = WideLSTM(input_size=129, output_size=129, dropout=0.2)  # Wide Net!!!!!

In [10]:
print(my_lstm.summarize())

DeepLSTM(
  (layer_norm_0): LayerNorm(torch.Size([129]), eps=1e-05, elementwise_affine=True)
  (lstm_1): LSTM(129, 300, batch_first=True)
  (layer_norm_1): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_1): Dropout(p=0.2)
  (lstm_2): LSTM(300, 300, batch_first=True)
  (layer_norm_2): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_2): Dropout(p=0.2)
  (lstm_3): LSTM(300, 300, batch_first=True)
  (layer_norm_3): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_3): Dropout(p=0.2)
  (lstm_4): LSTM(300, 300, batch_first=True)
  (layer_norm_4): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_4): Dropout(p=0.2)
  (lstm_5): LSTM(300, 300, batch_first=True)
  (layer_norm_5): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_5): Dropout(p=0.2)
  (lstm_6): LSTM(300, 300, batch_first=True)
  (layer_norm_6): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_af

In [11]:
# optimizer = optim.SGD(my_lstm.parameters(), lr=1.0)
# optimizer = optim.SGD(my_lstm.parameters(), lr=0.01, momentum=0.9, weight_decay=1.0, nesterov=True)
# optimizer = optim.RMSprop(my_lstm.parameters(), lr=0.05, weight_decay=0, momentum=0.9, centered=False)
optimizer = optim.Adam(my_lstm.parameters(), lr=0.001, weight_decay=0., amsgrad=True)

In [12]:
epoch = 0
time_ellapsed = 0.

In [13]:
load_saved = True
use_cuda = True

In [25]:
if load_saved:
    if use_cuda:
        my_lstm.cuda()
        device = 'cuda'
    else:
        my_lstm.cpu()
        device = 'cpu'

#     recent_state = torch.load('./best_d.pth', map_location=device)
#     recent_state = torch.load('./target_d07.pth', map_location=device)
#     recent_state = torch.load('./recent_d.pth', map_location=device)
    recent_state = torch.load('./temp/Deep/g3/recent_d.pth', map_location=device)

    my_lstm.load_state_dict(recent_state['state_dict'])
    optimizer.load_state_dict(recent_state['opt_dict'])
    epoch = recent_state['epoch'] + 1
    time_ellapsed = recent_state['time']
    print(recent_state)

{'time': 151656.45930075645, 'epoch': 1928, 'loss': 0.09452938586473465, 'acc': 0.9685988593101501, 'recall': 0.9641645574569702, 'state_dict': OrderedDict([('layer_norm_0.weight', tensor([1.0076, 0.7331, 0.7186, 0.8336, 0.7428, 0.6674, 0.8440, 0.7867, 0.7087,
        0.7958, 0.7996, 0.6929, 0.7903, 0.7488, 0.7201, 0.7428, 0.8401, 0.7048,
        0.7371, 0.9342, 1.0688, 1.0155, 1.0526, 1.0493, 1.0540, 1.0427, 1.0622,
        1.0211, 1.0486, 1.0210, 1.0589, 1.0519, 1.1260, 0.9912, 1.0701, 1.0884,
        1.0358, 1.0473, 1.1128, 1.1282, 1.0930, 1.0769, 1.0150, 1.1388, 1.0029,
        0.9847, 1.0795, 1.0382, 1.0769, 1.0189, 1.1009, 1.0623, 1.0795, 1.0840,
        1.0598, 1.0404, 1.0594, 0.9898, 0.9923, 1.0908, 1.0496, 1.1095, 1.0649,
        1.0634, 1.0547, 1.0304, 1.0929, 1.1100, 1.0235, 1.0730, 1.0373, 1.0281,
        1.0714, 1.0243, 1.0883, 1.0828, 1.0371, 1.0411, 1.0662, 1.1445, 1.0937,
        1.0564, 1.1035, 1.0639, 1.0806, 1.1247, 1.1127, 1.0421, 1.1490, 1.0576,
        1.0663, 1.0

In [26]:
print(my_lstm.summarize())

DeepLSTM(
  (layer_norm_0): LayerNorm(torch.Size([129]), eps=1e-05, elementwise_affine=True)
  (lstm_1): LSTM(129, 300, batch_first=True)
  (layer_norm_1): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_1): Dropout(p=0.2)
  (lstm_2): LSTM(300, 300, batch_first=True)
  (layer_norm_2): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_2): Dropout(p=0.2)
  (lstm_3): LSTM(300, 300, batch_first=True)
  (layer_norm_3): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_3): Dropout(p=0.2)
  (lstm_4): LSTM(300, 300, batch_first=True)
  (layer_norm_4): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_4): Dropout(p=0.2)
  (lstm_5): LSTM(300, 300, batch_first=True)
  (layer_norm_5): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_affine=True)
  (dropout_5): Dropout(p=0.2)
  (lstm_6): LSTM(300, 300, batch_first=True)
  (layer_norm_6): LayerNorm(torch.Size([300]), eps=1e-05, elementwise_af

In [27]:
print(optimizer)

Adam (
Parameter Group 0
    amsgrad: True
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0.0
)


In [None]:
if True:
    my_lstm.train()
    if use_cuda:
        my_lstm.cuda()
        device = 'cuda'
    else:
        my_lstm.cpu()
        device = 'cpu'

    loss_func = nn.CrossEntropyLoss(ignore_index=-200)

    batch_size = 4
    sequence_length = 9000
    first_n_ignore = 300
    transpose = 3

    loader = DataLoader(MyDataset(sequence_length=sequence_length, total_batch=-1, step_interval=sequence_length, transpose=transpose),
                        batch_size=batch_size,
                        shuffle=True,
                        drop_last=True)

    # best = {'time': 0.0, 'epoch': epoch, 'loss': 9999, 'acc': 0.0, 'recall': 0.0, 'state_dict': None, 'opt_dict': None}
    best = torch.load('./best_w.pth', map_location=device)

    hour = 0
    minute = 0
    second = 5
    train_duration = 3600 * hour + 60 * minute + second # seconds
    start_time = time()

    n_total_batches = len(loader)

    if True:
        torch.save(recent_state, './recent_w - Backup.pth')
        open('./train_log_w - Backup.txt', 'w').write(open('./train_log_w.txt', 'r').read())

    while time() - start_time < train_duration:

        prev_time = time()
        temp_time = time()

        losses = []
        accs = []
        recalls = []
        
        prev_progress = 0
        progress = 0
        for i, (sequence, target) in enumerate(loader):
            target = target[:, first_n_ignore:].contiguous()
            if use_cuda:
                sequence = sequence.cuda()
                target = target.cuda()
            my_lstm.zero_grad()
            output = my_lstm(sequence, my_lstm.init_hidden(batch_size=batch_size, cuda=use_cuda))[0][:, first_n_ignore:, :].contiguous()
            loss = loss_func(output.view(-1, 129), target.view(-1))
            if not torch.isfinite(loss):
                break
            
            loss.backward()
            optimizer.step()

            losses.append(loss.item())
            correct = output.argmax(dim=2).eq(target).float()
            accs.append(correct.mean().item())
            recalls.append(correct[target.ne(0)].mean().item())

            progress = int(((i + 1) / n_total_batches) * 10)
            prog_diff = progress - prev_progress
            if prog_diff >= 1:
                print('O ' * prog_diff, end='')
                prev_progress = progress
        print()

        if not torch.isfinite(loss):
            print("Loss Exploded!")
            break

        time_ellapsed += time() - prev_time
        avg_loss = np.array(losses).mean()
        avg_acc = np.array(accs).mean()
        avg_recall = np.array(recalls).mean()
        with open('./train_log_d.txt', 'a') as log:  # Change the name of the log accordingly
            log.write(f"Training Time : {round(time_ellapsed)}s\n")
            log.write(f"Loss at epoch {epoch} : {avg_loss}\n")
            log.write(f"Average accuracy : {avg_acc * 100:.3f}%\n")
            log.write(f"Average recall : {avg_recall * 100:.3f}%\n")
            log.write('\n')
        print(f"{round(time() - start_time)}s of {round(train_duration)}s since the start")
        print(f"Training Time : {round(time_ellapsed)}s")
        print(f"Average loss at epoch {epoch} : {avg_loss}")
        print(f"Average accuracy : {avg_acc * 100:.3f}%")
        print(f"Average recall : {avg_recall * 100:.3f}%")

        recent_state = {'time': time_ellapsed, 'epoch': epoch, 'loss': avg_loss, 'acc': avg_acc, 'recall': avg_recall, 'state_dict': my_lstm.state_dict(), 'opt_dict': optimizer.state_dict()}
        torch.save(recent_state, './recent_w.pth')
        if avg_loss < best['loss']:
            best = recent_state
            torch.save(best, './best_w.pth')
            open('./train_log_w_best.txt', 'w').write(open('./train_log_w.txt').read())
            print("New Best!")
            
        print()

        epoch += 1

    torch.cuda.empty_cache()

In [None]:
if False:
    my_lstm.train()
    if use_cuda:
        my_lstm.cuda()
        device = 'cuda'
    else:
        my_lstm.cpu()
        device = 'cpu'

    loss_func = nn.CrossEntropyLoss(ignore_index=-200)

    batch_size = 16
    sequence_length = 20000
    backprop_length = 1000
    n_steps_per_batch = np.ceil(sequence_length / backprop_length).astype(int)
    transpose = 3

    loader = DataLoader(MyDataset(sequence_length=sequence_length, total_batch=-1, step_interval=sequence_length, transpose=transpose),
                        batch_size=batch_size,
                        shuffle=True,
                        drop_last=True)

    # best = {'time': 0.0, 'epoch': epoch, 'loss': 9999, 'acc': 0.0, 'recall': 0.0, 'state_dict': None, 'opt_dict': None}
    best = torch.load('./best_d.pth', map_location=device)

    hour = 1
    minute = 0
    second = 0
    train_duration = 3600 * hour + 60 * minute + second # seconds
    start_time = time()
    
    n_total_batches = len(loader)
    
    if True:
        torch.save(recent_state, './recent_d - Backup.pth')
        open('./train_log_d - Backup.txt', 'w').write(open('./train_log_d.txt', 'r').read())

    while time() - start_time < train_duration:

        prev_time = time()
        temp_time = time()

        losses = []
        accs = []
        recalls = []
        
        prev_progress = 0
        progress = 0
        for i, (sequence, target) in enumerate(loader):
            hiddens = my_lstm.init_hidden(batch_size=batch_size, cuda=use_cuda)
            sub_losses = []
            sub_accs = []
            sub_recalls = []

            for j in range(n_steps_per_batch):
                sub_sequence = sequence[:, j * backprop_length : (j + 1) * backprop_length].contiguous()
                sub_target = target[:, j * backprop_length : (j + 1) * backprop_length].contiguous()
                if use_cuda:
                    sub_sequence = sub_sequence.cuda()
                    sub_target = sub_target.cuda()
                my_lstm.zero_grad()

                output, hiddens = my_lstm(sub_sequence, my_lstm.init_hidden(batch_size=batch_size, cuda=use_cuda))
                hiddens = tuple((h.data, c.data) for (h, c) in hiddens)
                loss = loss_func(output.view(-1, 129), sub_target.view(-1))
                if not torch.isfinite(loss):
                    break
                    
                loss.backward()
                optimizer.step()

                sub_losses.append(loss.item())
                correct = output.argmax(dim=2).eq(sub_target).float()
                sub_accs.append(correct.mean().item())
                sub_recalls.append(correct[sub_target.ne(0)].mean().item())
                
                progress = int(((((i * n_steps_per_batch) + j + 1) / (n_total_batches * n_steps_per_batch)) * 10) // 1)
                prog_diff = progress - prev_progress
                if prog_diff >= 1:
                    print('O ' * prog_diff, end='')
                    prev_progress = progress
                    
            if not torch.isfinite(loss):
                break
            
            losses.append(np.array(sub_losses).mean())
            accs.append(np.array(sub_accs).mean())
            recalls.append(np.array(sub_recalls).mean())
        print()
        
        if not torch.isfinite(loss):
            print("Loss Exploded!")
            break

        time_ellapsed += time() - prev_time
        avg_loss = np.array(losses).mean()
        avg_acc = np.array(accs).mean()
        avg_recall = np.array(recalls).mean()
        with open('./train_log_d.txt', 'a') as log:  # Change the name of the log accordingly
            log.write(f"Training Time : {round(time_ellapsed)}s\n")
            log.write(f"Loss at epoch {epoch} : {avg_loss}\n")
            log.write(f"Average accuracy : {avg_acc * 100:.3f}%\n")
            log.write(f"Average recall : {avg_recall * 100:.3f}%\n")
            log.write('\n')
        print(f"{round(time() - start_time)}s of {round(train_duration)}s since the start")
        print(f"Training Time : {round(time_ellapsed)}s")
        print(f"Average loss at epoch {epoch} : {avg_loss}")
        print(f"Average accuracy : {avg_acc * 100:.3f}%")
        print(f"Average recall : {avg_recall * 100:.3f}%")
        print()

        recent_state = {'time': time_ellapsed, 'epoch': epoch, 'loss': avg_loss, 'acc': avg_acc, 'recall': avg_recall, 'state_dict': my_lstm.state_dict(), 'opt_dict': optimizer.state_dict()}
        torch.save(recent_state, './recent_d.pth')
        if avg_loss < best['loss']:
            best = recent_state
            torch.save(best, './best_d.pth')

        epoch += 1

    torch.cuda.empty_cache()

In [None]:
torch.cuda.empty_cache()

In [None]:
best

In [None]:
# my_lstm.cuda()

# recent_state = torch.load('./best.pth', map_location='cuda')
# recent_state = torch.load('./best_c.pth', map_location='cuda')
# recent_state = torch.load('./best_d.pth', map_location='cuda')
# recent_state = torch.load('./recent.pth', map_location='cuda')
# recent_state = torch.load('./recent_c.pth', map_location='cuda')
# recent_state = torch.load('./recent_d.pth', map_location='cuda')

# my_lstm.load_state_dict(recent_state['state_dict'])
# optimizer.load_state_dict(recent_state['opt_dict'])
# epoch = recent_state['epoch']
# time_ellapsed = recent_state['time']
# recent_state

In [17]:
use_cuda = True

my_lstm.eval()
if use_cuda:
    my_lstm.cuda()
    device = 'cuda'
else:
    my_lstm.cpu()
    device = 'cpu'

with torch.no_grad():
    predicted = []
    hidden = my_lstm.init_hidden(batch_size=1, cuda=use_cuda)
    process_length = 80000
    dataset_p = MyDataset(-1, total_batch=1, step_interval=1, random_choice=False)[0][0]

    total_len = len(dataset_p)
    processed = 0
    start_time = time()
    for data in [dataset_p[i * process_length : (i + 1) * process_length] for i in range(0, torch.ceil(torch.tensor(len(dataset_p) / process_length)).to(torch.int).item(), 1)]:
        processed += len(data)
        data = data.unsqueeze(dim=0)
        if use_cuda:
            data = data.cuda()
        output, hidden = my_lstm(data, hidden)
        predicted.extend(output.argmax(dim=2).view(-1).cpu().tolist())
        print(f"{processed} of {total_len} characters processed in {round(time() - start_time)}s ...")

    torch.cuda.empty_cache()
    print(predicted[:100])
predicted_text = ' '.join([str(note_num) for note_num in predicted])

80000 of 922874 characters processed in 15...
160000 of 922874 characters processed in 28...
240000 of 922874 characters processed in 43...
320000 of 922874 characters processed in 60...
400000 of 922874 characters processed in 75...
480000 of 922874 characters processed in 91...
560000 of 922874 characters processed in 107...
640000 of 922874 characters processed in 122...
720000 of 922874 characters processed in 138...
800000 of 922874 characters processed in 154...
880000 of 922874 characters processed in 170...
922874 of 922874 characters processed in 178...
[66, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 0, 54, 73, 54, 0, 0, 54, 85, 0, 54, 85, 0, 54, 85, 0, 54, 85, 0]


In [18]:
open('./predicted_d_best.txt', 'w', encoding='utf-8').write(predicted_text)

2516390

In [31]:
preprocess_with_cuda = False
use_cuda = False

torch.manual_seed(101)

starting_notes = MyDataset(sequence_length=100, total_batch=1, random_choice=False, transpose=0)[0][0].argmax(dim=1).tolist()
# starting_notes = [int(note_num) for note_num in open('./generated_d_best.txt', 'r', encoding='utf-8').read().strip(' ').split(' ')]

start_len = len(starting_notes)
process_len = 50000
generate_len = 1000000

temperature = 1.00

my_lstm.eval()
if preprocess_with_cuda:
    my_lstm.cuda()
    device = 'cuda'
else:
    my_lstm.cpu()
    device = 'cpu'

hidden = my_lstm.init_hidden(batch_size=1, cuda=preprocess_with_cuda)

generated_note_text = ''

with torch.no_grad():
    print("Processing starting notes ...")
    for i in range((start_len // process_len) + 1):
        input_len = len(starting_notes[i * process_len : (i + 1) * process_len])
        if input_len > 0:
            input = torch.zeros(1, input_len, 129, device=device)
            input.data[0, range(input_len), starting_notes[i * process_len : (i + 1) * process_len]] = 1
            output, hidden = my_lstm(input, hidden)
            print(f"{(i * process_len) + input_len} of {start_len} characters processed ...")
    print("All processed!")
    print("\nStart generating ...")
    start_time = time()

    prediction = output[0, -1].argmax(dim=-1)
    
    generated_note_text += str(prediction.item())
    
    my_lstm.eval()
    if use_cuda:
        my_lstm.cuda()
        device = 'cuda'
    else:
        my_lstm.cpu()
        device = 'cpu'
    hidden = tuple((h.to(device), c.to(device)) for (h, c) in hidden)

    for i in range(generate_len - 1):
        input = torch.zeros(1, 1, 129, device=device)
        input.data[0, 0, prediction] = 1

        output, hidden = my_lstm(input, hidden)
#         print(output)

        prediction = torch.multinomial(torch.nn.functional.softmax(output.view(1, 129), dim=1).pow(1 / temperature), 1)
#         prediction = output.argmax(dim=2)

        generated_note_text += ' ' + str(prediction.item())

        if (i + 2) % 1000 == 0:
            print(f"{i + 2} characters generated ... ( {round(time() - start_time, 2)}s )")

    print("\nFinished!\n")

    torch.cuda.empty_cache()
print('\n'.join(generated_note_text.split('\n')[:1000]))

Processing starting notes ...
100 of 100 characters processed ...
All processed!

Start generating ...
1000 characters generated ... ( 3.29s )
2000 characters generated ... ( 6.5s )
3000 characters generated ... ( 9.56s )
4000 characters generated ... ( 12.65s )
5000 characters generated ... ( 15.69s )
6000 characters generated ... ( 18.83s )
7000 characters generated ... ( 21.88s )
8000 characters generated ... ( 25.04s )
9000 characters generated ... ( 28.14s )
10000 characters generated ... ( 31.16s )
11000 characters generated ... ( 34.2s )
12000 characters generated ... ( 37.23s )
13000 characters generated ... ( 40.22s )
14000 characters generated ... ( 43.19s )
15000 characters generated ... ( 46.18s )
16000 characters generated ... ( 49.25s )
17000 characters generated ... ( 52.32s )
18000 characters generated ... ( 55.4s )
19000 characters generated ... ( 58.39s )
20000 characters generated ... ( 61.54s )
21000 characters generated ... ( 64.6s )
22000 characters generated ... 

189000 characters generated ... ( 601.65s )
190000 characters generated ... ( 604.32s )
191000 characters generated ... ( 607.25s )
192000 characters generated ... ( 610.34s )
193000 characters generated ... ( 613.5s )
194000 characters generated ... ( 616.33s )
195000 characters generated ... ( 619.33s )
196000 characters generated ... ( 622.46s )
197000 characters generated ... ( 625.15s )
198000 characters generated ... ( 627.85s )
199000 characters generated ... ( 630.47s )
200000 characters generated ... ( 633.13s )
201000 characters generated ... ( 635.76s )
202000 characters generated ... ( 638.56s )
203000 characters generated ... ( 641.28s )
204000 characters generated ... ( 643.95s )
205000 characters generated ... ( 646.6s )
206000 characters generated ... ( 649.33s )
207000 characters generated ... ( 651.98s )
208000 characters generated ... ( 654.65s )
209000 characters generated ... ( 657.33s )
210000 characters generated ... ( 660.02s )
211000 characters generated ... ( 

375000 characters generated ... ( 1104.14s )
376000 characters generated ... ( 1106.9s )
377000 characters generated ... ( 1109.62s )
378000 characters generated ... ( 1112.36s )
379000 characters generated ... ( 1114.95s )
380000 characters generated ... ( 1117.64s )
381000 characters generated ... ( 1120.28s )
382000 characters generated ... ( 1122.97s )
383000 characters generated ... ( 1125.59s )
384000 characters generated ... ( 1128.17s )
385000 characters generated ... ( 1130.86s )
386000 characters generated ... ( 1133.5s )
387000 characters generated ... ( 1136.11s )
388000 characters generated ... ( 1138.79s )
389000 characters generated ... ( 1141.4s )
390000 characters generated ... ( 1144.14s )
391000 characters generated ... ( 1146.72s )
392000 characters generated ... ( 1149.32s )
393000 characters generated ... ( 1151.99s )
394000 characters generated ... ( 1154.69s )
395000 characters generated ... ( 1157.35s )
396000 characters generated ... ( 1160.01s )
397000 charac

558000 characters generated ... ( 1593.81s )
559000 characters generated ... ( 1596.48s )
560000 characters generated ... ( 1599.12s )
561000 characters generated ... ( 1601.81s )
562000 characters generated ... ( 1604.45s )
563000 characters generated ... ( 1607.17s )
564000 characters generated ... ( 1609.77s )
565000 characters generated ... ( 1612.37s )
566000 characters generated ... ( 1615.03s )
567000 characters generated ... ( 1617.72s )
568000 characters generated ... ( 1620.54s )
569000 characters generated ... ( 1623.22s )
570000 characters generated ... ( 1626.0s )
571000 characters generated ... ( 1628.64s )
572000 characters generated ... ( 1631.33s )
573000 characters generated ... ( 1634.04s )
574000 characters generated ... ( 1636.75s )
575000 characters generated ... ( 1639.38s )
576000 characters generated ... ( 1642.01s )
577000 characters generated ... ( 1644.66s )
578000 characters generated ... ( 1647.28s )
579000 characters generated ... ( 1649.94s )
580000 char

741000 characters generated ... ( 2083.33s )
742000 characters generated ... ( 2086.06s )
743000 characters generated ... ( 2088.75s )
744000 characters generated ... ( 2091.32s )
745000 characters generated ... ( 2093.99s )
746000 characters generated ... ( 2096.59s )
747000 characters generated ... ( 2099.27s )
748000 characters generated ... ( 2101.87s )
749000 characters generated ... ( 2104.58s )
750000 characters generated ... ( 2107.13s )
751000 characters generated ... ( 2109.85s )
752000 characters generated ... ( 2112.44s )
753000 characters generated ... ( 2115.11s )
754000 characters generated ... ( 2117.72s )
755000 characters generated ... ( 2120.38s )
756000 characters generated ... ( 2122.97s )
757000 characters generated ... ( 2125.63s )
758000 characters generated ... ( 2128.37s )
759000 characters generated ... ( 2131.01s )
760000 characters generated ... ( 2133.6s )
761000 characters generated ... ( 2136.21s )
762000 characters generated ... ( 2138.85s )
763000 char

924000 characters generated ... ( 2571.32s )
925000 characters generated ... ( 2574.02s )
926000 characters generated ... ( 2576.68s )
927000 characters generated ... ( 2579.4s )
928000 characters generated ... ( 2582.04s )
929000 characters generated ... ( 2584.69s )
930000 characters generated ... ( 2587.35s )
931000 characters generated ... ( 2590.27s )
932000 characters generated ... ( 2593.26s )
933000 characters generated ... ( 2595.87s )
934000 characters generated ... ( 2598.53s )
935000 characters generated ... ( 2601.2s )
936000 characters generated ... ( 2603.95s )
937000 characters generated ... ( 2606.59s )
938000 characters generated ... ( 2609.72s )
939000 characters generated ... ( 2612.56s )
940000 characters generated ... ( 2615.33s )
941000 characters generated ... ( 2617.94s )
942000 characters generated ... ( 2620.56s )
943000 characters generated ... ( 2623.18s )
944000 characters generated ... ( 2625.81s )
945000 characters generated ... ( 2628.47s )
946000 chara

In [32]:
open('./generated_d.txt', 'w', encoding='utf-8').write(generated_note_text)

# open('./generated_d07.txt', 'a', encoding='utf-8').write(' ' + generated_note_text)

2690572

In [None]:
optimizer.state_dict()

# Different approach!

In [None]:
class NoteMatDataset(Dataset):
    def __init__(self, sequence_length, total_batch=-1, step_interval=1, random_choice=True, transpose=0):

        # invalid argument handling ---------------------------------------------
        if not isinstance(sequence_length, int):
            raise TypeError("value for sequence_length must be an integer")
        if not isinstance(total_batch, int):
            raise TypeError("value for total_batch must be an integer")
        if not isinstance(step_interval, int):
            raise TypeError("value for skip_interval must be an integer")
        if not isinstance(random_choice, bool):
            raise TypeError("value for random_choice must be a boolean")
        if not isinstance(transpose, int) or transpose < 0:
            raise ValueError("value for transpose must be a non-negative integer")
        # -----------------------------------------------------------------------

        super(NoteMatDataset, self).__init__()
        
        # conversion of text into note tensor------------------------------------
        timestep_list = open('./source/essence_text.txt', 'r', encoding='utf-8').read().split('\n')
        next_line_num = ord('\n')
        self.note_mat = torch.zeros(len(timestep_list), 128)
        note_indices = ([], [])
        for idx, current_note_string in enumerate(timestep_list):
            note_indices[0].extend([idx] * len(current_note_string))
            current_note_nums = [ord(note_chr) - next_line_num - 1 for note_chr in current_note_string]
            note_indices[1].extend(current_note_nums)
        self.note_mat[note_indices[0], note_indices[1]] = 1
        highest_note = max(note_indices[1])
        lowest_note = min(note_indices[1])
        if highest_note == 0:
            highest_note = 127
        # -----------------------------------------------------------------------
        
        # sequence_length check--------------------------------------------------
        if sequence_length < 1:
            self.sequence_length = len(self.note_mat) - 1
        else:
            self.sequence_length = min(sequence_length, len(self.note_mat) - 1)
        # -----------------------------------------------------------------------

        # total_batch and step_interval check------------------------------------
        if total_batch > 0:
            self.total_batch = min(total_batch, len(self.note_mat) - self.sequence_length)
            self.step_interval = (len(self.note_mat) - self.sequence_length) // self.total_batch
        elif step_interval > 1:
            self.step_interval = step_interval
            self.total_batch = (len(self.note_mat) - self.sequence_length) // self.step_interval
        else:
            self.total_batch = len(self.note_mat) - self.sequence_length
            self.step_interval = 1
        # -----------------------------------------------------------------------
            
        self.random_choice = random_choice
        
        # tranpose values--------------------------------------------------------
        self.transpose = transpose

        lower_margin = lowest_note
        upper_margin = 127 - highest_note

        if lower_margin < self.transpose:
            if upper_margin < self.transpose:
                self.low_transpose = -lower_margin
                self.high_transpose = upper_margin
            else:
                self.low_transpose = -lower_margin
                self.high_transpose = min(upper_margin, 2 * self.transpose - lower_margin)
        elif upper_margin < self.transpose:
            self.low_transpose = -min(upper_margin, 2 * self.transpose - lower_margin)
            self.high_transpose = upper_margin
        else:
            self.low_transpose = -self.transpose
            self.high_transpose = self.transpose
        # -----------------------------------------------------------------------
        
    def __len__(self):
        return self.total_batch
    
    def __getitem__(self, batch_idx):
        
        # invalid index handling -------------------------------------------------
        if not isinstance(batch_idx, int):
            raise IndexError(f"this dataset only takes one integer value as the index, but {type(batch_idx)} was given")
        if not batch_idx < self.total_batch:
            raise IndexError(f"index out of bounds (index > len)")
        if batch_idx < 0:
            batch_idx = self.total_batch + batch_idx
            if batch_idx < 0:
                raise IndexError(f"index out of bounds (index < -len)")
        # ------------------------------------------------------------------------

        # index selection --------------------------------------------------------
        if self.step_interval == 1:
            start_idx = batch_idx
        else:
            if self.random_choice:
                if batch_idx == self.total_batch - 1:
                    # This last index might have a wider range than other indices
                    start_idx = torch.randint(low=(self.step_interval * batch_idx),
                                        high=(len(self.note_mat) - self.sequence_length),
                                        size=(),
                                        dtype=torch.int).item()
                else:
                    start_idx = torch.randint(low=(self.step_interval * batch_idx),
                                        high=(self.step_interval * (batch_idx + 1)),
                                        size=(),
                                        dtype=torch.int).item()
            else:
                start_idx = self.step_interval * batch_idx
        end_idx = start_idx + self.sequence_length + 1  # (Excluding!)
        # ------------------------------------------------------------------------
        
        note_mat = torch.zeros(self.sequence_length + 1, 128)

        # transpose --------------------------------------------------------------
        if self.transpose > 0:
            transpose_val = torch.randint(low=self.low_transpose, high=self.high_transpose + 1, size=(), dtype=torch.int).item()
            if transpose_val < 0:
                note_mat.data[:, :transpose_val] = self.note_mat.data[start_idx:end_idx, -transpose_val:]
            elif transpose_val > 0:
                note_mat.data[:, transpose_val:] = self.note_mat.data[start_idx:end_idx, :-transpose_val]
            else:
                note_mat.data[:, :] = self.note_mat.data[start_idx:end_idx, :]
        else:
            note_mat.data[:, :] = self.note_mat.data[start_idx:end_idx, :]
        # ------------------------------------------------------------------------

        input_tensor = torch.zeros(self.sequence_length, 128)
        input_tensor.data = note_mat.data[:-1]

        target_tensor = torch.zeros(self.sequence_length, 128)
        target_tensor.data = note_mat.data[1:]

        return input_tensor, target_tensor

In [None]:
def text_to_notemat(note_string):
    next_line_num = ord('\n')
    timestep_list = note_string.split('\n')
    note_mat = torch.zeros(len(timestep_list), 128)
    note_indices = ([], [])
    for idx, current_note_string in enumerate(timestep_list):
        note_indices[0].extend([idx] * len(current_note_string))
        current_note_nums = [ord(note_chr) - next_line_num - 1 for note_chr in current_note_string]
        note_indices[1].extend(current_note_nums)
    note_mat[note_indices[0], note_indices[1]] = 1
    return note_mat

def notemat_to_text(note_mat):
    next_line_num = ord('\n')
    chr_list = [chr(note_num + next_line_num + 1) for note_num in range(128)]
    note_mat = note_mat.cpu().view(-1, 128).int().numpy()
    n_timesteps = len(note_mat)
    text_list = []
    for time_step in range(n_timesteps):
        text = ''
        for pitch in note_mat[time_step, :].nonzero()[0]:
            text += chr_list[pitch]
        text_list.append(text)
    note_text = '\n'.join(text_list)
    return note_text

In [None]:
dataset = NoteMatDataset(5, total_batch=-1, step_interval=1, transpose=2)
print(len(dataset))
(tuple(map(lambda x: x.nonzero(), dataset[0])))

In [None]:
my_lstm = DeepLSTM(input_size=128, output_size=128, dropout=0.2)
optimizer = optim.Adam(my_lstm.parameters(), lr=0.000005, weight_decay=0., amsgrad=True)
epoch = 0
time_ellapsed = 0.

In [None]:
load_saved = True
use_cuda = True

In [None]:
if load_saved:
    if use_cuda:
        my_lstm.cuda()
        device = 'cuda'
    else:
        my_lstm.cpu()
        device = 'cpu'

#     recent_state = torch.load('./best_n.pth', map_location=device)
    recent_state = torch.load('./recent_n.pth', map_location=device)

    my_lstm.load_state_dict(recent_state['state_dict'])
    optimizer.load_state_dict(recent_state['opt_dict'])
    epoch = recent_state['epoch'] + 1
    time_ellapsed = recent_state['time']
    print(recent_state)

In [None]:
print(my_lstm)

In [None]:
optimizer.param_groups[0]['lr'] = 0.00001
optimizer.param_groups[0]['weight_decay'] = 0.
print(optimizer)

In [None]:
use_cuda = True

my_lstm.train()
if use_cuda:
    my_lstm.cuda()
    device = 'cuda'
else:
    my_lstm.cpu()
    device = 'cpu'

loss_func = nn.BCEWithLogitsLoss(pos_weight=torch.ones(128, device=device).mul(20))

batch_size = 5
sequence_length = 7000
transpose = 3

threshold = 0.5

loader = DataLoader(NoteMatDataset(sequence_length=sequence_length, total_batch=-1, step_interval=sequence_length, transpose=transpose),
                    batch_size=batch_size,
                    shuffle=True,
                    drop_last=True)

# best = {'time': 0., 'epoch': epoch, 'loss': 9999, 'acc': 0., 'recall': 0., 'precision': 0., 'f1': 0., 'state_dict': None, 'opt_dict': None}
best = torch.load('./best_n.pth', map_location=device)

hour = 1
minute = 0
second = 0
train_duration = 3600 * hour + 60 * minute + second  # seconds
start_time = time()

if True:
    initial_state = recent_state
    torch.save(initial_state, './recent_n - Backup.pth')
    open('./train_log_n - Backup.txt', 'w').write(open('./train_log_n.txt', 'r').read())

while time() - start_time < train_duration:

    prev_time = time()

    losses = []
    accs = []
    recalls = []
    precisions = []
    for sequence, target in loader:
        if use_couda:
            sequence = sequence.cuda()
            target = target.cuda()
#         loss_func = nn.BCEWithLogitsLoss(pos_weight=target.mean(dim=1).mean(dim=0).add(1e-4).reciprocal())  # Weights depending on each batch
        my_lstm.zero_grad()
        output = my_lstm(sequence, my_lstm.init_hidden(batch_size=batch_size, cuda=use_cuda))[0]
        loss = loss_func(output, target)
        if not torch.isfinite(loss):
            break
        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        accs.append(output.sigmoid().ge(threshold).eq(target.to(torch.uint8)).to(torch.float).mean().item())
        recalls.append((output.sigmoid().ge(threshold) & target.to(torch.uint8)).to(torch.float).sum().div(target.to(torch.float).sum()).item())
        precisions.append((output.sigmoid().ge(threshold) & target.to(torch.uint8)).to(torch.float).sum().div(output.sigmoid().ge(threshold).to(torch.float).sum()).item())


    if not torch.isfinite(loss):
        print("Loss Exploded!")
        break
    time_ellapsed += time() - prev_time
    avg_loss = torch.tensor(losses).mean().item()
    avg_acc = torch.tensor(accs).mean().item()
    avg_recall = torch.tensor(recalls).mean().item()
    avg_precision = torch.tensor(precisions).mean().item()
    f1_score = 2 * avg_recall * avg_precision / (avg_recall + avg_precision)
    with open('./train_log_n.txt', 'a') as log:
        log.write(f"Training Time : {round(time_ellapsed)}s\n")
        log.write(f"Average loss at epoch {epoch} : {avg_loss}\n")
        log.write(f"Average accuracy : {avg_acc * 100:.3f}%\n")
        log.write(f"Average recall : {avg_recall * 100:.3f}%\n")
        log.write(f"Average precision : {avg_precision * 100:.3f}%\n")
        log.write(f"F1 Score with averages : {f1_score:.5f}\n")
        log.write('\n')
    print(f"Training Time : {round(time_ellapsed)}s")
    print(f"Average loss at epoch {epoch} : {avg_loss}")
    print(f"Average accuracy : {avg_acc * 100:.3f}%")
    print(f"Average recall : {avg_recall * 100:.3f}%")
    print(f"Average precision : {avg_precision * 100:.3f}%")
    print(f"F1 Score with averages : {f1_score:.5f}")
    print()

    recent_state = {'time': time_ellapsed, 'epoch': epoch, 'loss': avg_loss, 'acc': avg_acc, 'recall': avg_recall, 'precision': avg_precision, 'f1': f1_score, 'state_dict': my_lstm.state_dict(), 'opt_dict': optimizer.state_dict()}
    torch.save(recent_state, './recent_n.pth')
    if f1_score > best['f1']:
        best = recent_state
        torch.save(best, './best_n.pth')

    epoch += 1

torch.cuda.empty_cache()

In [None]:
best

In [None]:
my_lstm.eval()
if use_cuda:
    my_lstm.cuda()
    device = 'cuda'
else:
    my_lstm.cpu()
    device = 'cpu'

threshold = 0.5

predicted_text_list = []

print("Start Processing ...\n")
start_time = time()
with torch.no_grad():
    hidden = my_lstm.init_hidden(batch_size=1, cuda=use_cuda)
    process_length = 25000
    dataset_p = NoteMatDataset(-1, total_batch=1, step_interval=1, random_choice=False)[0][0]
    total_length = len(dataset_p)

    processed = 0
    for data in [dataset_p[i * process_length : (i + 1) * process_length] for i in range(0, torch.ceil(torch.tensor(len(dataset_p) / process_length)).to(torch.int).item(), 1)]:
        processed += len(data)
        data = data.unsqueeze(dim=0)
        if use_cuda:
            data = data.cuda()
        output, hidden = my_lstm(data, hidden)
        predicted_text_list.append(notemat_to_text(output.view(-1, 128).sigmoid().ge(threshold)))
        print(f"{processed} of {total_length} timesteps processed in {round(time() - start_time)}s ...")
    
    print(f"\n... Done in {round(time() - start_time)}s\n")

    predicted_text = '\n'.join(predicted_text_list)
    my_lstm.cpu()
    torch.cuda.empty_cache()
    print(repr(predicted_text[:300]))

In [None]:
open('./predicted_n.txt', 'w', encoding='utf-8').write(predicted_text)

In [None]:
starting_notes_tensor = NoteMatDataset(sequence_length=10000, total_batch=1, random_choice=True, transpose=0)[0][0]
# starting_notes_tensor = text_to_notemat(open('./generated_c.txt', 'r', encoding='utf-8').read())

start_len = len(starting_notes_tensor)
process_len = 50000
generate_len = 1920 * 3 - 10

threshold = 0.6
sparsity_exponent = 3.0

my_lstm.eval()
my_lstm.cuda()

hidden = my_lstm.init_hidden(batch_size=1, cuda=True)

generated_notes_text = ''

with torch.no_grad():
    print("Processing starting notes ...")
    for i in range((start_len // process_len) + 1):
        input_len = len(starting_notes_tensor[i * process_len : (i + 1) * process_len, :])
        if input_len > 0:
            input = starting_notes_tensor[i * process_len : (i + 1) * process_len, :].unsqueeze(dim=0).cuda()
            output, hidden = my_lstm(input, hidden)
            print(f"{(i * process_len) + input_len} of {start_len} timesteps processed ...")
    print("All processed!")
    print("\nStart generating ...")
    
    output = output[0, -1].view(1, 128).sigmoid()
    output.data[output < threshold] = output.data[output < threshold].pow(sparsity_exponent)

#     prediction = output.ge(threshold)
    prediction = output.bernoulli()
    
    generated_notes_text += notemat_to_text(prediction)

    for i in range(generate_len - 1):
        input = prediction.to(torch.float).view(1, 1, 128).cuda()

        output, hidden = my_lstm(input, hidden)

        output = output.view(1, 128).sigmoid()
        output.data[output < threshold] = output.data[output < threshold].pow(sparsity_exponent)

#         prediction = output.ge(threshold)
        prediction = output.bernoulli()

        generated_notes_text += '\n' + notemat_to_text(prediction)
 
        if (i + 2) % 1000 == 0:
            print(f"{i + 2} timesteps generated ...")

    my_lstm.cpu()
    torch.cuda.empty_cache()
print(repr(generated_notes_text[:10000]))

In [None]:
open('./generated_n.txt', 'w', encoding='utf-8').write(generated_notes_text)
# open('./generated_n.txt', 'a', encoding='utf-8').write('\n' + generated_notes_text)

# Note Count Predictor

In [None]:
print("Minimum number of notes :", NoteMatDataset(-1, total_batch=1, step_interval=1, random_choice=False)[0][0].sum(dim=-1).min().item())
print("Maximum number of notes :", NoteMatDataset(-1, total_batch=1, step_interval=1, random_choice=False)[0][0].sum(dim=-1).max().item())

In [None]:
count_lstm = DeepLSTM(input_size=256, output_size=15, dropout=0.2)
count_optimizer = optim.Adam(count_lstm.parameters(), lr=0.0005, weight_decay=0., amsgrad=True)
count_epoch = 0
count_time_ellapsed = 0.

In [None]:
count_load_saved = True

In [None]:
if count_load_saved:
    count_lstm.cuda()

#     recent_state = torch.load('./best_nc.pth', map_location='cuda')
    recent_count_state = torch.load('./recent_nc.pth', map_location='cuda')

    count_lstm.load_state_dict(recent_count_state['state_dict'])
    count_optimizer.load_state_dict(recent_count_state['opt_dict'])
    count_epoch = recent_count_state['epoch'] + 1
    count_time_ellapsed = recent_count_state['time']
    print(recent_count_state)

In [None]:
print(count_lstm)

In [None]:
count_optimizer.param_groups[0]['lr'] = 0.001
count_optimizer.param_groups[0]['weight_decay'] = 0.
print(count_optimizer)

In [None]:
count_lstm.train()
my_lstm.cuda()
count_lstm.cuda()

loss_func = nn.CrossEntropyLoss()

batch_size = 5
sequence_length = 7000
transpose = 3

loader = DataLoader(NoteMatDataset(sequence_length=sequence_length, total_batch=-1, step_interval=sequence_length, transpose=transpose),
                    batch_size=batch_size,
                    shuffle=True,
                    drop_last=True)

count_best = {'time': 0., 'epoch': 0, 'loss': float('inf'), 'accuracy': 0.0, 'recall': 0.0, 'state_dict': None, 'opt_dict': None}
# count_best = torch.load('./best_nc.pth', map_location='cuda')

hour = 2
minute = 0
second = 0
train_duration = 3600 * hour + 60 * minute + second  # seconds
start_time = time()

if True:
    initial_count_state = recent_count_state
    torch.save(initial_count_state, './recent_nc - Backup.pth')
    open('./train_log_nc - Backup.txt', 'w').write(open('./train_log_nc.txt', 'r').read())

while time() - start_time < train_duration:

    prev_time = time()

    losses = []
    accs = []
    recalls = []
    for sequence, target in loader:
        sequence = sequence.cuda()
        target = target.sum(dim=-1).long().cuda()
        count_lstm.zero_grad()
        
        with torch.no_grad():
            predicts = my_lstm(sequence, my_lstm.init_hidden(batch_size=batch_size, cuda=True))[0].sigmoid()
        
        input_sequence = torch.cat([sequence, predicts], dim=-1)
        output = count_lstm(input_sequence, count_lstm.init_hidden(batch_size=batch_size, cuda=True))[0]
        loss = loss_func(output.view(-1, 15), target.view(-1))
        if not torch.isfinite(loss):
            break
        loss.backward()
        count_optimizer.step()

        losses.append(loss.item())
        accs.append(output.argmax(dim=-1).eq(target.long()).float().mean().item())
        recalls.append(output.argmax(dim=-1).eq(target.long())[target != 0].float().mean().item())

    if not torch.isfinite(loss):
        print("Loss Exploded!")
        break
    count_time_ellapsed += time() - prev_time
    avg_loss = torch.tensor(losses).mean().item()
    avg_acc = torch.tensor(accs).mean().item()
    avg_recall = torch.tensor(recalls).mean().item()
    with open('./train_log_nc.txt', 'a') as log:
        log.write(f"Training Time : {round(count_time_ellapsed)}s\n")
        log.write(f"Average loss at epoch {count_epoch} : {avg_loss}\n")
        log.write(f"Average accuracy : {avg_acc * 100:.3f}%\n")
        log.write(f"Average recall : {avg_recall * 100:.3f}%\n")
        log.write('\n')
    print(f"Training Time : {round(count_time_ellapsed)}s")
    print(f"Average loss at epoch {count_epoch} : {avg_loss}")
    print(f"Average accuracy : {avg_acc * 100:.3f}%")
    print(f"Average recall : {avg_recall * 100:.3f}%")
    print()

    count_recent_state = {'time': count_time_ellapsed, 'epoch': count_epoch, 'loss': avg_loss, 'accuracy': avg_acc, 'recall': avg_recall, 'state_dict': count_lstm.state_dict(), 'opt_dict': count_optimizer.state_dict()}
    torch.save(count_recent_state, './recent_nc.pth')
    if avg_recall < count_best['recall']:
        count_best = count_recent_state
        torch.save(count_best, './best_nc.pth')

    count_epoch += 1


count_lstm.cpu()
torch.cuda.empty_cache()

In [None]:
my_lstm.eval()
count_lstm.eval()
my_lstm.cuda()
count_lstm.cuda()

threshold = 0.5

predicted_text_list = []

print("Start Processing ...\n")
start_time = time()
with torch.no_grad():
    hidden = my_lstm.init_hidden(1, cuda=True)
    count_hidden = count_lstm.init_hidden(1, cuda=True)
    process_length = 25000
    dataset_p = NoteMatDataset(-1, total_batch=1, step_interval=1, random_choice=False)[0][0]
    total_length = len(dataset_p)

    processed = 0
    for data in [dataset_p[i * process_length : (i + 1) * process_length] for i in range(0, torch.ceil(torch.tensor(len(dataset_p) / process_length)).to(torch.int).item(), 1)]:
        processed += len(data)
        data = data.view(1, -1, 128).cuda()
        output, hidden = my_lstm(data, hidden)
        count_output, count_hidden = count_lstm(torch.cat([data, output.sigmoid()], dim=-1), count_hidden)
        
        output = output.sigmoid().view(-1, 128)
        count_output = count_output.view(-1, 15)

        predicted_count = count_output.argmax(dim=-1)
        adjusted_count = output.ge(threshold).sum(dim=-1).long().min(predicted_count)

        note_mat = torch.zeros_like(output)
        top_indices = output.topk(k=output.shape[-1], dim=-1)[1]
        for count in range(count_output.shape[-1]):
            note_mat.data[adjusted_count.ge(count), top_indices[adjusted_count.ge(count), count]] = 1.

        predicted_text_list.append(notemat_to_text(note_mat))
        print(f"{processed} of {total_length} timesteps processed in {round(time() - start_time)}s ...")
    
    print(f"\n... Done in {round(time() - start_time)}s\n")

    predicted_text = '\n'.join(predicted_text_list)
    my_lstm.cpu()
    count_lstm.cpu()
    torch.cuda.empty_cache()
    print(repr(predicted_text[:300]))

In [None]:
open('./predicted_nc.txt', 'w', encoding='utf-8').write(predicted_text)

In [None]:
starting_notes_tensor = NoteMatDataset(sequence_length=10000, total_batch=1, random_choice=True, transpose=0)[0][0]
# starting_notes_tensor = text_to_notemat(open('./generated_c.txt', 'r', encoding='utf-8').read())

start_len = len(starting_notes_tensor)
process_len = 50000
generate_len = 1920 * 10 - 10

threshold = 0.3

my_lstm.eval()
count_lstm.eval()
my_lstm.cuda()
count_lstm.cuda()

hidden = my_lstm.init_hidden(batch_size=1, cuda=True)
count_hidden = count_lstm.init_hidden(batch_size=1, cuda=True)

generated_notes_text = ''

with torch.no_grad():
    print("Processing starting notes ...")
    for i in range((start_len // process_len) + 1):
        input_len = len(starting_notes_tensor[i * process_len : (i + 1) * process_len, :])
        if input_len > 0:
            input = starting_notes_tensor[i * process_len : (i + 1) * process_len, :].unsqueeze(dim=0).cuda()
            output, hidden = my_lstm(input, hidden)
            count_output, count_hidden = count_lstm(torch.cat([input, output.sigmoid()], dim=-1), count_hidden)
            print(f"{(i * process_len) + input_len} of {start_len} timesteps processed ...")
    print("All processed!")
    print("\nStart generating ...")
    
    output = output[0, -1].view(1, 128).sigmoid()
    count = count_output[0, -1].view(1, 15).argmax(dim=-1).item()
    adjusted_count = min(output.ge(threshold).sum().item(), count)

    prediction = torch.zeros_like(output)
    if adjusted_count > 0:
        prediction.data[0, output.topk(k=adjusted_count, dim=-1)[1].view(-1)] = 1.
    
    generated_notes_text += notemat_to_text(prediction)

    for i in range(generate_len - 1):
        input = prediction.float().view(1, 1, 128).cuda()

        output, hidden = my_lstm(input, hidden)
        count_output, count_hidden = count_lstm(torch.cat([input, output.sigmoid()], dim=-1), count_hidden)

        output = output.view(1, 128).sigmoid()
        count = count_output.view(1, 15).argmax(dim=-1).item()
        adjusted_count = min(output.ge(threshold).sum().item(), count)

        prediction = torch.zeros_like(output)
        if adjusted_count > 0:
            prediction.data[0, output.topk(k=adjusted_count, dim=-1)[1].view(-1)] = 1.

        generated_notes_text += '\n' + notemat_to_text(prediction)

        if (i + 2) % 1000 == 0:
            print(f"{i + 2} timesteps generated ...")

    my_lstm.cpu()
    count_lstm.cpu()
    torch.cuda.empty_cache()
print(repr(generated_notes_text[:10000]))

In [None]:
open('./generated_nc.txt', 'w', encoding='utf-8').write(generated_notes_text)