<a href="https://colab.research.google.com/github/wafa-b/Introduction-to-Deep-Learning-with-PyTorch/blob/master/Love%20song%20generator%20Challenge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/


In [0]:
import os
os.chdir("/content/drive/My Drive/")

In [0]:
#Import Libraries
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F

In [0]:
#Open file & read data as text
with open('data/love songs.txt','r')as f:
    text=f.read()

In [6]:
#Check out the first 100 characters
text[:100]

"Doesn't take much to make me happy\nAnd make me smile with glee \nNever never will I feel discouraged "

In [0]:
#Encode text & map each char to int and vice versa
#Create 2 dictionaries
#1. int->char
#2. char->int
chars=tuple(set(text))
int2char=dict(enumerate(chars))
char2int={ch:ii for ii,ch in int2char.items()}
#Encode text
encoded=np.array([char2int[ch] for ch in text])

In [8]:
#Characters encoded as integers
encoded[:100]

array([ 8,  7, 63, 55, 77,  9, 78,  1, 78, 49, 59, 63,  1, 56, 79, 29, 74,
        1, 78,  7,  1, 56, 49, 59, 63,  1, 56, 63,  1, 74, 49, 21, 21, 25,
        6, 75, 77,  3,  1, 56, 49, 59, 63,  1, 56, 63,  1, 55, 56, 44, 91,
       63,  1, 19, 44, 78, 74,  1, 80, 91, 63, 63,  1,  6, 87, 63, 65, 63,
       17,  1, 77, 63, 65, 63, 17,  1, 19, 44, 91, 91,  1, 92,  1, 57, 63,
       63, 91,  1,  3, 44, 55, 29,  7, 79, 17, 49, 80, 63,  3,  1])

In [0]:
#Pre-processing the data
#LSTM expects input (one-hot encoded) meaning that each character is converted into an integer(via our created dictionary). 
#then converted into a column vector where only it's corresponding integer index 
#will have the value of 1 and the rest of the vector will be filled with 0's.
def one_hot_encode(arr,n_labels):
    #Define encoded array
    one_hot=np.zeros((arr.size,n_labels),dtype=np.float32)
    #Fill approprite elements with ones
    one_hot[np.arange(one_hot.shape[0]),arr.flatten()]=1
    #Reshape it to get back to original array
    one_hot=one_hot.reshape((*arr.shape,n_labels))
    return one_hot

In [0]:
#create mini-batches for training
def get_batches(arr,batch_size,seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr. 
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    batch_size_total=batch_size*seq_length
    #Get the number of batches we can make
    n_batches= len(arr)//batch_size_total 
    #Keep only enough characters to make full batches
    arr=arr[:n_batches*batch_size_total] 
    #Reshape into batch_size rows
    arr=arr.reshape((batch_size,-1))
    #Iterate over the batches using a window of size seq_length
    for n in range(0,arr.shape[1],seq_length):
        #The features
        x=arr[:,n:n+seq_length]        
        #The targets,shifted by one
        y=np.zeros_like(x)
        try:
            y[:,:-1],y[:,-1]=x[:,1:],arr[:,n+seq_length]
        except IndexError:
            y[:,:-1],y[:,-1]=x[:,1:],arr[:,0]
        yield x,y

In [11]:
#Check if GPU is available
train_on_gpu=torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

Training on GPU!


In [0]:
#Define our network
class CharRNN(nn.Module):
    
    def __init__(self,tokens,n_hidden=256,n_layers=2,drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob=drop_prob
        self.n_layers=n_layers
        self.n_hidden=n_hidden
        self.lr=lr
        
        #Creating character dictionaries
        self.chars=tokens
        self.int2char=dict(enumerate(self.chars))
        self.char2int={ch:ii for ii,ch in self.int2char.items()}
        
        #Define the layers of the model
        #Dfine LSTM
        self.lstm=nn.LSTM(len(self.chars),n_hidden,n_layers,dropout=drop_prob, batch_first=True)
        #Define dropout layer
        self.dropout=nn.Dropout(drop_prob)
        #Define final,fully-connected output layer
        self.fc=nn.Linear(n_hidden,len(self.chars))
      
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
                
        #Get outputs and new hidden state from the lstm
        r_output,hidden=self.lstm(x, hidden)
        #Pass through dropout layer
        out=self.dropout(r_output)
        #Stack up LSTM outputs using view
        #You may need to use contiguous to reshape the output
        out=out.contiguous().view(-1,self.n_hidden)
        #Put x through the fully-connected layer
        out=self.fc(out)
        #Return the final output and the hidden state
        return out,hidden
    
    def init_hidden(self,batch_size):
        ''' Initializes hidden state '''
        #Create two new tensors with sizes n_layers x batch_size x n_hidden,
        #Initialized to zero, for hidden state and cell state of LSTM
        weight=next(self.parameters()).data
        
        if (train_on_gpu):
            hidden=(weight.new(self.n_layers,batch_size,self.n_hidden).zero_().cuda(),
                    weight.new(self.n_layers,batch_size,self.n_hidden).zero_().cuda())
        else:
            hidden=(weight.new(self.n_layers,batch_size,self.n_hidden).zero_(),
                    weight.new(self.n_layers,batch_size,self.n_hidden).zero_())
        
        return hidden

In [0]:
#Define training function
def train(net,data,epochs=10,batch_size=10,seq_length=50,lr=0.001,clip=5,val_frac=0.1,print_every=10):
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss
    
    '''
    net.train()
    
    opt=torch.optim.Adam(net.parameters(),lr=lr)
    criterion=nn.CrossEntropyLoss()
    
    #Create training and validation data
    val_idx=int(len(data)*(1-val_frac))
    data,val_data=data[:val_idx],data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
    
    counter=0
    n_chars=len(net.chars)
    for e in range(epochs):
        #Initialize hidden state
        h=net.init_hidden(batch_size)
        
        for x, y in get_batches(data, batch_size, seq_length):
            counter+=1
            
            #One-hot encode our data and make them Torch tensors
            x=one_hot_encode(x,n_chars)
            inputs,targets=torch.from_numpy(x),torch.from_numpy(y)
            
            if(train_on_gpu):
                inputs,targets=inputs.cuda(),targets.cuda()

            #Creating new variables for the hidden state, otherwise
            #We'd backprop through the entire training history
            h=tuple([each.data for each in h])

            #Zero accumulated gradients
            net.zero_grad()
            
            # get the output from the model
            output,h=net(inputs,h)
            
            #Calculate the loss and perform backprop
            loss=criterion(output,targets.view(batch_size*seq_length).long())
            loss.backward()
            #'Clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            #Loss stats
            if counter % print_every==0:
                #Get validation loss
                val_h=net.init_hidden(batch_size)
                val_losses=[]
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    #One-hot encode our data and make them Torch tensors
                    x=one_hot_encode(x, n_chars)
                    x,y=torch.from_numpy(x),torch.from_numpy(y)
                    
                    #Creating new variables for the hidden state, otherwise
                    #We'd backprop through the entire training history
                    val_h=tuple([each.data for each in val_h])
                    
                    inputs,targets=x,y
                    if(train_on_gpu):
                        inputs,targets=inputs.cuda(),targets.cuda()

                    output,val_h=net(inputs,val_h)
                    val_loss=criterion(output,targets.view(batch_size*seq_length).long())
                
                    val_losses.append(val_loss.item())
                
                net.train()# reset to train mode after iterationg through validation data
                
                print("Epoch: {}/{}...".format(e+1,epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [14]:
#Set model hyperparameters
#Define and print the net
n_hidden=512
n_layers=2
net=CharRNN(chars,n_hidden,n_layers)
print(net)

CharRNN(
  (lstm): LSTM(99, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=99, bias=True)
)


In [25]:
#Set training hyperparameters
batch_size=128
seq_length=100
n_epochs=20  # start small if you are just testing initial behavior

#Train the model
train(net,encoded,epochs=n_epochs,batch_size=batch_size,seq_length=seq_length,lr=0.001,print_every=10)

Epoch: 1/20... Step: 10... Loss: 1.0617... Val Loss: 1.4647
Epoch: 1/20... Step: 20... Loss: 1.0262... Val Loss: 1.4677
Epoch: 1/20... Step: 30... Loss: 1.0142... Val Loss: 1.4563
Epoch: 1/20... Step: 40... Loss: 1.0244... Val Loss: 1.4599
Epoch: 1/20... Step: 50... Loss: 1.0664... Val Loss: 1.4595
Epoch: 1/20... Step: 60... Loss: 1.0350... Val Loss: 1.4594
Epoch: 2/20... Step: 70... Loss: 1.0144... Val Loss: 1.4613
Epoch: 2/20... Step: 80... Loss: 0.9919... Val Loss: 1.4639
Epoch: 2/20... Step: 90... Loss: 1.0012... Val Loss: 1.4588
Epoch: 2/20... Step: 100... Loss: 1.0096... Val Loss: 1.4613
Epoch: 2/20... Step: 110... Loss: 1.0451... Val Loss: 1.4680
Epoch: 2/20... Step: 120... Loss: 1.0111... Val Loss: 1.4712
Epoch: 3/20... Step: 130... Loss: 1.0139... Val Loss: 1.4732
Epoch: 3/20... Step: 140... Loss: 0.9892... Val Loss: 1.4642
Epoch: 3/20... Step: 150... Loss: 0.9854... Val Loss: 1.4741
Epoch: 3/20... Step: 160... Loss: 0.9733... Val Loss: 1.4631
Epoch: 3/20... Step: 170... Loss:

In [0]:
#Checkpoint
#Saving file
model_name='love songs.net'

checkpoint={'n_hidden':net.n_hidden,'n_layers':net.n_layers,'state_dict':net.state_dict(),'tokens':net.chars}

with open(model_name,'wb') as f:
    torch.save(checkpoint,f)

In [0]:
def predict(net,char,h=None,top_k=None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        #Tensor inputs
        x=np.array([[net.char2int[char]]])
        x=one_hot_encode(x,len(net.chars))
        inputs=torch.from_numpy(x)
        
        if(train_on_gpu):
            inputs=inputs.cuda()
        
        #Detach hidden state from history
        h=tuple([each.data for each in h])
        #Get the output of the model
        out,h=net(inputs,h)

        #Get the character probabilities
        p=F.softmax(out,dim=1).data
        if(train_on_gpu):
            p=p.cpu() # move to cpu
        
        #Get top characters
        if top_k is None:
            top_ch=np.arange(len(net.chars))
        else:
            p,top_ch=p.topk(top_k)
            top_ch=top_ch.numpy().squeeze()
        
        #Select the likely next character with some element of randomness
        p=p.numpy().squeeze()
        char=np.random.choice(top_ch,p=p/p.sum())
        
        #Return the encoded value of the predicted char and the hidden state
        return net.int2char[char],h

In [0]:
def sample(net,size,prime='The',top_k=None):
        
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() # eval mode
    
    #First off,run through the prime characters
    chars=[ch for ch in prime]
    h=net.init_hidden(1)
    for ch in prime:
        char,h=predict(net,ch,h,top_k=top_k)

    chars.append(char)
    
    #Now pass in the previous character and get a new one
    for ii in range(size):
        char,h=predict(net,chars[-1],h,top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [53]:
print(sample(net,1000,prime='I love',top_k=5))

I love you, love on, yeah, yeah, yeah, yeah, yeah, yeah, yeah


I've looked to me (you’re still the one)

I want to spend the rest of my life
There was a lot lonely words to you
I swear that's the way you love me
Is where you're gonna rade me
We belong to the rumh of your dreams
I'm so in love with you, but I can't go, I'm gonna be around
Sometimes I were together
And I could take it alone

I see my way for this fear

And tell me now
I love you baby I love you
I love you, oh I love you,
I want to be your man

There's so much time
I was thround befone I see you
I can tear you to a girl like the more 
I can't live, and I do that for you, and I want you, that I could be we can't be burning star
I wanna be with you 
In the morning right where I didn't start 

They walk away 
They say that my love for thes good times

I know that love will drive me

And you know you should go away

A love this love is all I see
The only times I would be called you belain

I'm not gonna break your heart
I kn