In [1]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import torch.utils.tensorboard as tb
import preprocessing
from torch.utils.data.sampler import SequentialSampler
import seaborn as sns
import matplotlib.pyplot as plt
import os
import logging
torch.set_printoptions(profile="full")
import pandas as pd

In [18]:
#static parameters
train_batch_size = 170
val_batch_size = 170
input_size = 38
hidden_size = 128
num_layer = 2
output_size = 38
clip = 3

In [19]:
#get data from preprocessing.py
dataset_path = os.path.join(os.path.abspath('..'),'Dataset\\Clementi dataset\\Clementi dataset' )
network_input, network_output = preprocessing.preprocess_notes(dataset_path)
network_input, network_output = network_input.cuda(), network_output.cuda()
#print(network_input)
#print(network_output)

In [20]:
'''
data is highly unbalanced
'''
#sns.distplot(torch.tensor(network_output).cpu())
#xx = pd.DataFrame(torch.tensor(network_output).cpu())
#xx.groupby(0).size().to_frame(name='values')

'\ndata is highly unbalanced\n'

In [21]:
'''
to make batch of equal sizes
Quick Fix
'''
network_input = network_input[: -117]
network_output = network_output[: -117]

print(network_input.shape)
print(network_output.shape)

torch.Size([8500, 50, 38])
torch.Size([8500])


In [22]:
'''
create Stacked LSTM model
'''
class Stacked_LSTM(nn.Module):
    def __init__(self,input_size, hidden_size, num_layers, output_size):
        super().__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True, dropout = 0.5)
        
        self.dropout = nn.Dropout(0.5)
        self.linear = nn.Linear(hidden_size, output_size)
        
        
    def forward(self, x, hidden,batch_size):
        
        output, hidden = self.lstm(x, hidden)        
        #print(output.shape)
        
        # stack up lstm outputs
        output = output.contiguous().view(-1, self.hidden_size)
        #print(output.shape)
        
        output = self.dropout(output)
        output = self.linear(output)
        #print('Linear Output :-',output.shape)
        
        #output = F.softmax(output, dim = 1)
        #print('SOFTMAX OUTPUT :--', output)
        
        
        # reshape to be batch_size first
        output = output.view(batch_size, -1)
        #print('Reshape to batch size first :-',output.shape)
        
        output = output[:, -x.shape[2]:] # get last batch of labels
        #print('Final Output :-',output)

        
        return output, hidden
    
    def hidden_init(self,batch_size):
        
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        hidden = (weight.new(self.num_layers, batch_size, self.hidden_size).zero_(),
          weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
        return hidden

#initialize the weights of LSTM using Xavier initialization    
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)

    

In [38]:
'''
Divide the dataset into train/val 
'''
train_size = 0.8
indices = list(range(len(network_input)))
split = int(np.floor(train_size*len(network_input)))
train_idx, val_idx = indices[:split], indices[split:]

train_sampler = SequentialSampler(train_idx)
val_sampler = SequentialSampler(val_idx)

dataset = TensorDataset(network_input,network_output)
train_loader = DataLoader(dataset, batch_size= train_batch_size, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size= val_batch_size,sampler= val_sampler)

In [46]:
import torch.optim as optimizer

model = Stacked_LSTM(input_size,hidden_size,num_layer,output_size)
model.apply(init_weights)

criterion = nn.CrossEntropyLoss()
optimizer = optimizer.AdamW(model.parameters())
#optimizer = optimizer.RMSprop(model.parameters(), lr = 0.001, weight_decay = 0.01)


#make sure to transfer model to GPU after initializing optimizer
model.cuda()

Stacked_LSTM(
  (lstm): LSTM(38, 128, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (linear): Linear(in_features=128, out_features=38, bias=True)
)

In [47]:
optimizer

AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0.01
)

In [48]:
epochs = 200
for e in range(epochs):
    
    train_loss = 0
    val_loss = 0
    train_accuracy = 0
    val_accuracy = 0
    
    hidden = model.hidden_init(train_batch_size)    
    #print('hidden[0].shape:- ',hidden[0].shape)
    
    for inputs,labels in train_loader:
        #print(inputs.shape)

        '''
        Creating new variables for the hidden state, otherwise
        we'd backprop through the entire training history
        '''
        h = tuple([each.data for each in hidden])
        

        # zero accumulated gradients
        model.zero_grad()

        # get the output from the model
        output, h = model.forward(inputs, h,train_batch_size)
        #print(output)
        
        #print('Labels Shape :-', (torch.max(labels, 1)[1]).shape)
    
        # calculate the loss and perform backprop
        #print('Labels Long :-', labels.long())
        loss = criterion(output,labels.long())
        
        loss.backward()
        
        #calculate training accuracy
        output = F.softmax(output)
        top_p, top_class = output.topk(1, dim=1)
        logging.debug(' top probab {} top class {}'.format(top_p.view(-1, top_p.shape[0]), top_class.view(-1, top_p.shape[0])))

        equals = top_class == labels.long().view(*top_class.shape)
        train_accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
        
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        #nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        
        train_loss += loss.item()
        #print(train_loss)
              
    model.eval()
    for inputs, labels in val_loader:
                
        val_h = tuple([each.data for each in hidden])
        
        output, hidden = model.forward(inputs, val_h,val_batch_size)
        loss = criterion(output,labels.long())
        
        val_loss += loss.item()
        
        #calculate validation accuracy
        output = F.softmax(output)
        top_p, top_class = output.topk(1, dim=1)
        
        #logging.debug(output)
        #logging.debug('VALIDATION top probab {} VALIDATION top class {}'.format(top_p.view(-1, top_p.shape[0]), top_class.view(-1, top_p.shape[0])))

        #print('Top Class:- ',top_class)
        equals = top_class == labels.long().view(*top_class.shape)
        #print('Equals:- ', equals)
        val_accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
        
    model.train()
    
    #Averaging losses
    train_loss = train_loss/len(train_loader)
    val_loss = val_loss/len(val_loader)
    val_accuracy = val_accuracy/len(val_loader)
    train_accuracy = train_accuracy/len(train_loader)
    
    print('Epoch: {}\tTrain Loss: {:.7f} \tVal Loss:{:.7f} \tTrain Acc: {:.7}% \tVal Acc: {:.7f}%'.format(e, train_loss, val_loss, train_accuracy*100,val_accuracy*100))      



Epoch: 0	Train Loss: 3.3189146 	Val Loss:3.0111136 	Train Acc: 8.985294% 	Val Acc: 11.4117651%
Epoch: 1	Train Loss: 3.0822378 	Val Loss:2.9015777 	Train Acc: 9.117647% 	Val Acc: 13.1764709%
Epoch: 2	Train Loss: 2.9454238 	Val Loss:2.7799143 	Train Acc: 11.89706% 	Val Acc: 12.3529414%
Epoch: 3	Train Loss: 2.8156076 	Val Loss:2.6406058 	Train Acc: 13.72059% 	Val Acc: 15.1176476%
Epoch: 4	Train Loss: 2.7097604 	Val Loss:2.5547369 	Train Acc: 15.20588% 	Val Acc: 16.4117652%
Epoch: 5	Train Loss: 2.6227233 	Val Loss:2.4986680 	Train Acc: 15.72059% 	Val Acc: 14.7058827%
Epoch: 6	Train Loss: 2.5657228 	Val Loss:2.4337704 	Train Acc: 16.61765% 	Val Acc: 17.1176475%
Epoch: 7	Train Loss: 2.5113175 	Val Loss:2.4043114 	Train Acc: 17.95588% 	Val Acc: 18.2352945%
Epoch: 8	Train Loss: 2.4833560 	Val Loss:2.3491976 	Train Acc: 19.14706% 	Val Acc: 20.0588240%
Epoch: 9	Train Loss: 2.4363530 	Val Loss:2.3246825 	Train Acc: 20.55882% 	Val Acc: 23.0588243%
Epoch: 10	Train Loss: 2.4142076 	Val Loss:2.255089

Epoch: 86	Train Loss: 0.6248749 	Val Loss:0.3505402 	Train Acc: 80.23529% 	Val Acc: 90.2941173%
Epoch: 87	Train Loss: 0.6132146 	Val Loss:0.3720383 	Train Acc: 80.76471% 	Val Acc: 88.4705877%
Epoch: 88	Train Loss: 0.6009349 	Val Loss:0.3334596 	Train Acc: 81.32353% 	Val Acc: 90.4705870%
Epoch: 89	Train Loss: 0.5866326 	Val Loss:0.3221521 	Train Acc: 81.91176% 	Val Acc: 91.8823516%
Epoch: 90	Train Loss: 0.5728702 	Val Loss:0.3039346 	Train Acc: 81.66176% 	Val Acc: 92.1176463%
Epoch: 91	Train Loss: 0.5574482 	Val Loss:0.2946312 	Train Acc: 82.0% 	Val Acc: 91.6470575%
Epoch: 92	Train Loss: 0.5505330 	Val Loss:0.2923189 	Train Acc: 82.86765% 	Val Acc: 92.6470578%
Epoch: 93	Train Loss: 0.5423165 	Val Loss:0.2950038 	Train Acc: 83.10294% 	Val Acc: 91.8235278%
Epoch: 94	Train Loss: 0.5298859 	Val Loss:0.2772740 	Train Acc: 83.17647% 	Val Acc: 93.4117627%
Epoch: 95	Train Loss: 0.5423090 	Val Loss:0.2837212 	Train Acc: 82.60294% 	Val Acc: 92.7647048%
Epoch: 96	Train Loss: 0.5201515 	Val Loss:0.

Epoch: 171	Train Loss: 0.2128841 	Val Loss:0.0519821 	Train Acc: 93.25% 	Val Acc: 98.7647045%
Epoch: 172	Train Loss: 0.2103501 	Val Loss:0.0461004 	Train Acc: 93.25% 	Val Acc: 98.8235283%
Epoch: 173	Train Loss: 0.2154471 	Val Loss:0.0433420 	Train Acc: 93.41176% 	Val Acc: 99.1764688%
Epoch: 174	Train Loss: 0.2170243 	Val Loss:0.0819967 	Train Acc: 93.04412% 	Val Acc: 97.7647042%
Epoch: 175	Train Loss: 0.2160470 	Val Loss:0.0579155 	Train Acc: 93.11765% 	Val Acc: 98.4117639%
Epoch: 176	Train Loss: 0.2158372 	Val Loss:0.0570730 	Train Acc: 93.07353% 	Val Acc: 98.5882342%
Epoch: 177	Train Loss: 0.2064410 	Val Loss:0.0520751 	Train Acc: 93.32353% 	Val Acc: 98.8823515%
Epoch: 178	Train Loss: 0.1983982 	Val Loss:0.0553749 	Train Acc: 93.73529% 	Val Acc: 98.9999986%
Epoch: 179	Train Loss: 0.1945203 	Val Loss:0.0403278 	Train Acc: 93.58823% 	Val Acc: 98.9411753%
Epoch: 180	Train Loss: 0.2013926 	Val Loss:0.0385233 	Train Acc: 93.5% 	Val Acc: 99.2352921%
Epoch: 181	Train Loss: 0.2038730 	Val Lo

### Music Genaration


In [None]:
#load population dataset


In [None]:
#Predicting
test_hidden = model.hidden_init(test_seq.shape[0])
test_output,_ = model.forward(test_seq, test_hidden,test_seq.shape[0])

top_p, top_class = test_output.topk(1,dim =1)
top_class