In [1]:
import os
import numpy as np
import math

from easydict import EasyDict

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from random import shuffle
from data_handler import *
import utils
import itertools
from keras.utils import to_categorical

# %%
opts = EasyDict()
opts.data_directory = "./Data/"
opts.train_data_file = "train.txt"
opts.val_data_file = "val.txt"
opts.n_epochs = 100
opts.batch_size = 1  # TODO minibatch size 1 for simplifying / taking chunks of 100
opts.seq_length = 100
opts.learning_rate = 0.01
opts.lr_decay = 0.99
opts.hidden_size = 100
opts.generate_seq_length = 5
opts.temperature = 0.7
opts.model_name = "LSTM"
opts.checkpoints_dir = "./checkpoints/" + opts.model_name

Using TensorFlow backend.


In [2]:
'''
def read_batches(list_index, opts):
    # discards the last chunks
    shuffle(list_index)
    merged_song = list(itertools.chain.from_iterable(list_index))
    num_batch = math.ceil(1.0 * len(merged_song) / opts.seq_length)
    for batch_index in range(0, num_batch):
        if batch_index == num_batch - 1:
            temp = merged_song[batch_index * opts.seq_length:]
            if len(temp) <= 1:
                inputs = temp
                targets = []
            else:
                inputs = temp
                targets = temp[1:]
            inputs = np.pad(np.array(inputs), (0, opts.seq_length - len(inputs)), 'constant',
                                constant_values=(0, 93))
            targets = np.pad(np.array(targets), (0, opts.seq_length - len(targets)), 'constant',
                                 constant_values=(0, 93))
        else:
            inputs = np.array(merged_song[batch_index * opts.seq_length:(batch_index + 1) * opts.seq_length])
            targets = np.array(merged_song[batch_index * opts.seq_length + 1:(batch_index + 1) * opts.seq_length + 1])
        inputs = np.expand_dims(inputs, axis=0)
        targets = np.expand_dims(targets, axis=0)
        input_tensors = torch.LongTensor(inputs)
        target_tensors = torch.LongTensor(targets)
        yield input_tensors, target_tensors
'''
def read_batches(list_index, opts):
    print(list_index)
    shuffle(list_index)
    for itr in range(0, len(list_index)):
        song = list_index[itr]
        song_length = len(song)
        num_batch = math.ceil(1.0 * song_length / opts.seq_length)
        for batch_index in range(0, num_batch):
            if batch_index == num_batch - 1:
                temp = song[batch_index * opts.seq_length:]
                if len(temp) <= 1:
                    inputs = temp
                    targets = []
                else:
                    inputs = temp
                    targets = temp[1:]
                inputs = np.pad(np.array(inputs), (0, opts.seq_length - len(inputs)), 'constant',
                                constant_values=(0, 93))
                targets = np.pad(np.array(targets), (0, opts.seq_length - len(targets)), 'constant',
                                 constant_values=(0, 93))
            else:
                inputs = np.array(song[batch_index * opts.seq_length:(batch_index + 1) * opts.seq_length])
                targets = np.array(song[batch_index * opts.seq_length + 1:(batch_index + 1) * opts.seq_length + 1])
            inputs = np.expand_dims(inputs, axis=0)
            targets = np.expand_dims(targets, axis=0) 
            input_tensors = torch.LongTensor(inputs)
            target_tensors = torch.LongTensor(targets)
            yield input_tensors, target_tensors

'''
    chunk = opts.batch_size*opts.seq_length

    for i in range(0, batch_num): 

        if (i != batch_num-1):
            start = i * chunk
            end = start + chunk

            #TODO check for chunking
            inputs = all_characters[start:end]
            targets = all_characters[start+1:end+1]

            #TODO how you want to define batches?
            input_tensors = torch.LongTensor(inputs).view(opts.batch_size, opts.seq_length)
            output_tensors = torch.LongTensor(targets).view(opts.batch_size, opts.seq_length)

        #TODO how do you want to tackle the last part
        else:
            start = i * chunk

            inputs = all_characters[start:]
            targets = np.concatenate((all_characters[start+1:], [idx_dict['end_token']]))


            input_tensors = torch.LongTensor(inputs).view(1, len(inputs))
            output_tensors = torch.LongTensor(targets).view(1, len(targets))
        yield input_tensors, output_tensor
'''


# %%
class RNN(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(RNN, self).__init__()

        self.vocab_size = vocab_size
        self.hidden_size = hidden_size

        # self.embedding = nn.Embedding(vocab_size, hidden_size)

        # self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first = True)
        self.lstm = nn.RNN(vocab_size, hidden_size,num_layers= 1, batch_first=True)

        self.out = nn.Linear(hidden_size, vocab_size)
        # self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, inputs, hidden=None):
        # encoded = self.embedding(inputs)  # batch_size x seq_len x hidden_size

        # output, hidden = self.lstm(encoded, hidden)
        output, hidden = self.lstm(inputs, hidden)
        outputs = self.out(output)

        return outputs, hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters())
        return weight.new_zeros(1, bsz, 100)

def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()



def training_model(train_characters, val_characters, vocab_size, idx_dict, model, opts, epochs=8):
    model_optimizer = optim.Adam(model.parameters(), lr=opts.learning_rate)
    criterion = nn.CrossEntropyLoss().to(computing_device)

    loss_log = open(os.path.join(opts.checkpoints_dir, 'loss_log.txt'), 'w')

    best_val_loss = 1e6
    train_losses = []
    val_losses = []

    loss_log.write('started training')
    
    for epoch in range(opts.n_epochs):
        hidden = None
        model_optimizer.param_groups[0]['lr'] *= opts.lr_decay

        epoch_losses = []
        print("Epoch {}/{}".format(epoch + 1, epochs))

        for i, (inputs, targets) in enumerate(read_batches(train_characters, opts)):

            model_optimizer.zero_grad()
            
            inputs_embed = torch.FloatTensor(to_categorical(inputs, num_classes=vocab_size)).to(computing_device)
            targets = targets.to(computing_device)
            hidden = repackage_hidden(hidden)
            outputs, hidden = model.forward(inputs_embed, hidden)
            
            loss = 0.0
            
            # TODO check
            for j in range(targets.shape[1]):
                loss += criterion(outputs[:, j, :], targets[:, j])

            loss /= float(targets.shape[1])

            # TODO check
            loss.backward()

            model_optimizer.step()

            epoch_losses.append(loss.item())
            
            if 93 in inputs.cpu().detach().numpy()[0]:
                hidden = None

            if (i % 100 == 0):
                print("Batch: {}, Loss: {}".format(i + 1, loss.item()))

            if (i % 1000 == 0):

                train_loss = np.mean(epoch_losses)
                val_loss = evaluate(val_characters, model, idx_dict, criterion, opts)

                if val_loss < best_val_loss:
                    utils.store_checkpoints(model, opts)

                generate_tune = generate_sequence('<start>', idx_dict, model,opts)  # TODO should we choose the best model here?
                print(
                    "Epoch: {:3d}| Batch: {:3d} | Train loss: {:.3f} | Val loss: {:.3f} | Gen: {:20s}".format(epoch, i,
                                                                                                              train_loss,
                                                                                                              val_loss,
                                                                                                              generate_tune))

                loss_log.write('{} {} {}\n'.format(epoch, train_loss, val_loss))
                loss_log.flush()

                train_losses.append(train_loss)
                val_losses.append(val_loss)

                utils.store_loss_plots(train_losses, val_losses, opts)

                epoch_losses = []


def evaluate(data, model, idx_dict, criterion, opts):
    losses = []
    hidden = model.init_hidden(1)

    for i, (inputs, targets) in enumerate(read_batches(data, opts)):

        inputs = torch.FloatTensor(to_categorical(inputs, num_classes=vocab_size)).to(computing_device)
        targets = targets.to(computing_device)
        hidden = repackage_hidden(hidden)
        outputs, hidden = model.forward(inputs, hidden)

        loss = 0.0

        for j in range(targets.shape[1]):
            loss += criterion(outputs[:, j, :], targets[:, j])

        loss /= float(targets.shape[1])
        losses.append(loss.item())

    mean_loss = np.mean(losses)

    return mean_loss


# %%
def generate_sequence(start_string, idx_dict, model, opts):
    char_to_index = idx_dict['char_to_index']
    index_to_char = idx_dict['index_to_char']

    # start_string = 'abc' #for a better hidden state, in our case should be stated with <start>
    start_characters = np.asarray([char_to_index[c] for c in start_string], dtype=np.int32)

    inputs = np.zeros((1, len(start_string)))

    for i in range(0, len(start_string)):  # TODO limit here if want to start only with <
        inputs[0, i] = start_characters[i]

    inputs = torch.LongTensor(inputs)
    hidden = None
    inputs = torch.FloatTensor(to_categorical(inputs, num_classes=vocab_size)).to(computing_device)
    outputs, hidden = model.forward(inputs, hidden)
    output = outputs[:, -1:, ]

    final_output_sequence = []

    for i in range(opts.generate_seq_length):
        probabilities = F.softmax(output.div(opts.temperature).squeeze(0).squeeze(0))
        current_input = torch.multinomial(probabilities.data, 1)
        #print(current_input)
        
#         probabilities, indices = torch.max(output.div(opts.temperature).squeeze(0).squeeze(0),0)
#         current_input = indices.unsqueeze(0)
        final_output_sequence.append(current_input.data)

        current_input = torch.cuda.FloatTensor(
            to_categorical(current_input.cpu().detach().numpy(), num_classes=vocab_size)).unsqueeze(
            0)  # .cpu().to(computing_device)
  
        output, hidden = model.forward(current_input, hidden)

    sampled_sequence = torch.cat(final_output_sequence, dim=0).cpu().detach().numpy()

    geneated_seq = ''.join([index_to_char[i] for i in sampled_sequence])

    return geneated_seq


# %%
# train_characters, vocab_size, idx_dict = load_data(opts.data_directory+opts.train_data_file)
train_characters, vocab_size, idx_dict = load_data(opts.data_directory + opts.train_data_file)
val_characters, _, _ = load_data(opts.data_directory + opts.val_data_file,idx_dict)
model = RNN(vocab_size=vocab_size, hidden_size=opts.hidden_size)

# Check if your system supports CUDA
use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    computing_device = torch.device("cuda")
    extras = {"num_workers": 1, "pin_memory": True}
    print("CUDA is supported")
else:  # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

model = model.to(computing_device)
print("Model on CUDA?", next(model.parameters()).is_cuda)

if __name__ == "__main__":
    utils.create_dir_if_not_exists(opts.checkpoints_dir)
    training_model(train_characters, val_characters, vocab_size, idx_dict, model, opts)

    best_model = RNN(vocab_size=vocab_size, hidden_size=opts.hidden_size)
    best_model = utils.restore_checkpoints(best_model, opts)
    best_model = best_model.to(computing_device)
    geneated_seq = generate_sequence('<start>', idx_dict, best_model, opts)
    print(geneated_seq)



Number of unique characters in our whole tunes database = 93
CUDA is supported
Model on CUDA? True
Epoch 1/8
[[27, 81, 82, 63, 80, 82, 29, 1, 55, 26, 17, 1, 51, 26, 2, 43, 63, 2, 44, 77, 76, 82, 68, 63, 80, 71, 76, 67, 1, 57, 26, 51, 80, 63, 76, 81, 65, 80, 71, 82, 2, 67, 82, 15, 77, 83, 2, 65, 77, 80, 80, 71, 69, 30, 2, 78, 63, 80, 2, 44, 71, 65, 70, 67, 74, 2, 33, 36, 43, 43, 46, 45, 2, 13, 2, 18, 16, 16, 21, 13, 16, 23, 13, 18, 20, 1, 57, 26, 47, 77, 83, 80, 2, 82, 77, 83, 82, 67, 2, 77, 64, 81, 67, 80, 84, 63, 82, 71, 77, 76, 2, 75, 63, 71, 74, 82, 77, 26, 69, 63, 74, 77, 83, 84, 71, 67, 74, 74, 67, 31, 68, 80, 67, 67, 14, 68, 80, 1, 44, 26, 2, 20, 15, 20, 1, 43, 26, 2, 17, 15, 24, 1, 48, 26, 17, 15, 20, 28, 17, 24, 22, 1, 37, 38, 37, 2, 33, 28, 32, 38, 2, 38, 28, 32, 38, 2, 37, 18, 37, 2, 37, 38, 37, 2, 89, 37, 91, 37, 18, 36, 2, 36, 37, 36, 90, 1, 89, 36, 91, 36, 18, 35, 2, 37, 38, 37, 2, 33, 28, 32, 38, 2, 38, 28, 32, 38, 2, 89, 37, 91, 37, 18, 37, 2, 37, 36, 35, 2, 34, 18, 38, 

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
train_losses = np.load("vanilla_train_losses.npy")
val_losses = np.load("vanilla_val_losses.npy")
plt.figure()
train_length = math.floor(len(train_losses)/8.0)
val_length = math.floor(len(val_losses)/8.0)
plt.plot(range(train_length), train_losses[:train_length], label = 'Training Set Loss')
plt.plot(range(val_length), val_losses[:val_length], label = 'Validation Set Loss')
plt.legend(loc='upper right')
plt.title('batch_size={}, lr={}, hidden_size={}'.format(opts.batch_size, opts.learning_rate, opts.hidden_size), fontsize=20)
plt.xlabel('Epochs', fontsize=16)
plt.ylabel('Loss', fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
plt.show()