# Generation Song Lyrics using Tensorflow

In [1]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np
import random
import pandas as pd

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
# Get the Input file which contains song layrics
df = pd.read_csv("./songdata.csv")
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [3]:
# Get the total number of songs/rows
df.shape[0]

57650

In [4]:
# Get the total number of unique artists
len(df.drop_duplicates(subset = ['artist']))

643

In [5]:
# Number of songs from each artist
df.groupby('artist').count().song

artist
'n Sync          93
ABBA            113
Ace Of Base      74
Adam Sandler     70
Adele            54
               ... 
Zoegirl          38
Zornik           12
Zox              21
Zucchero         30
Zwan             14
Name: song, Length: 643, dtype: int64

In [6]:
# Make the string containing each song layrics
data = ', '.join(df['text'])
print(data[0:300])

Look at her face, it's a wonderful face  
And it means something special to me  
Look at the way that she smiles when she sees me  
How lucky can one fellow be?  
  
She's just my kind of girl, she makes me feel fine  
Who could ever believe that she could be mine?  
She's just my kind of girl, with


In [7]:
# Get all the unique characters. This will be vocabulury for RNN
chars = sorted(list(set(data)))
vocab_size = len(chars)
print(chars)
print(vocab_size)

['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
76


In [8]:
# Create numeric index for each characters
char_to_ix = {ch:i for i, ch in enumerate(chars)}
print(char_to_ix['s'])
ix_to_char = {i:ch for i, ch in enumerate(chars)}
print(ix_to_char[68])

68
s


In [9]:
# Define the one-hot encoaded vectors for all chars
def one_hot_encoder(index):
    return np.eye(vocab_size)[index]
one_hot_encoder(68)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
# Defining the network parameters
hidden_size = 100 # Hidden layer units
seq_length = 25 # Length of input and output sequence
learning_rate = 1e-1
seed_value = 42
tf.random.set_random_seed(seed_value)
random.seed(seed_value)

In [11]:
# Define the placeholders for Input and Output
inputs = tf.placeholder(shape = [None,vocab_size], dtype = tf.float32, name = 'inputs')
targets = tf.placeholder(shape = [None,vocab_size], dtype = tf.float32, name = 'targets')

In [12]:
# Define the placeholdr for initial hidden state
init_state = tf.placeholder(shape = [1,hidden_size], dtype = tf.float32, name = 'state')

In [13]:
# Initiliaze the different weights for RNN
initializer = tf.random_normal_initializer(stddev = 0.1)

In [14]:
# Define the Forward Propogation in RNN
with tf.variable_scope("RNN") as scope:
    h_t = init_state
    y_hat = []
    
    for t,x_t in enumerate(tf.split(inputs, seq_length, axis = 0)):
        if t > 0:
            scope.reuse_variables()
            
        # Input to Hidden layer weights
        U = tf.get_variable("U", [vocab_size, hidden_size], initializer = initializer)
    
        # Define the Hidden to Hidden layer weights
        W = tf.get_variable("W", [hidden_size, hidden_size], initializer = initializer)

        # Define the Hidden to Output layer weights
        V = tf.get_variable("V", [hidden_size, vocab_size], initializer = initializer)

        # Bias for Hidden Layer
        bh = tf.get_variable("bh", [hidden_size], initializer = initializer)

        # Bias for Output Layer
        by = tf.get_variable("by", [vocab_size], initializer = initializer)

        h_t = tf.tanh(tf.matmul(x_t, U) + tf.matmul(h_t, W) + bh) # Uxt + Wht + bh
        
        y_hat_t = tf.tanh(tf.matmul(h_t, V) + by) # Vht + by

        y_hat.append(y_hat_t)

In [15]:
# Apply the Softmax function on y_hat and get the probabilities of each word
output_softmax = tf.nn.softmax(y_hat[-1])
outputs = tf.concat(y_hat, axis = 0)

In [16]:
# Compute the cross-entropy loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = targets, logits = outputs))
hprev = h_t # This will be used for prediction

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [17]:
# Define the BPTT (Backpropogation through time)
minimizer = tf.train.AdamOptimizer()
gradients = minimizer.compute_gradients(loss)
threshold = tf.constant(5.0, name="grad_clipping")
clipped_gradients = []
for grad, var in gradients:
    clipped_grad = tf.clip_by_value(grad, -threshold, threshold)
    clipped_gradients.append((clipped_grad, var))
updated_gradients = minimizer.apply_gradients(clipped_gradients)    

In [18]:
# Start Generation Songs
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

In [34]:
pointer = 0
iteration = 0

while True:
    
    if pointer + seq_length+1 >= len(data) or iteration == 0:
        hprev_val = np.zeros([1, hidden_size])
        pointer = 0 
        
    # Get the Input Sequence
    input_sentence = data[pointer:pointer+seq_length]
    # Get the Output Sentence
    output_sentence = data[pointer+1:pointer+seq_length+1]
    
    # Get the indices of input and output sentence
    input_indices = [char_to_ix[ch] for ch in input_sentence]
    target_indices = [char_to_ix[ch] for ch in output_sentence]
    
    # Convert the Input and Output indices to a one-hot encoded vectors
    input_vector = one_hot_encoder(input_indices)
    target_vector = one_hot_encoder(target_indices)
    
    # Train the network and get the final hidden state
    hprev_val,loss_val,_,out = sess.run([hprev, loss, updated_gradients,outputs],
                                   feed_dict = {inputs:input_vector, targets:target_vector, init_state:hprev_val})
    
    # Make the prediction on every 500th iteration
    if iteration % 500 == 0:
        
        # Length of words we want to predict
        sample_length = 500
        
        # Randomly select Index
        random_index = random.randint(0, len(data) - seq_length)
        
        # Sample the Input Sentence with randomly selected index
        sample_input_sent = data[random_index:random_index+seq_length]
        
        # Get the Indicies for the Sample Input Sentence
        sample_input_indices = [char_to_ix[ch] for ch in sample_input_sent]
        
        sample_prev_state_val = np.copy(hprev_val)
        
        # Store the Predicted Words in this list
        predicted_indices = []
        
        for t in range(sample_length):
            
            # Convert the sample input indices to one-hot encoded vectors
            sample_input_vector = one_hot_encoder(sample_input_indices)
            
            # Compute the probability of each words in the volablary to be the next word
            probs_dist, sample_prev_state_val = sess.run([output_softmax,hprev],
                                                        feed_dict = {inputs:sample_input_vector, init_state:sample_prev_state_val})
            
            # Randomly select the index with probability distribution generated by model
            ix = np.random.choice(range(vocab_size), p = probs_dist.ravel())
            
            sample_input_indices = sample_input_indices[1:] + [ix]
            
            # Store the predicted indices
            predicted_indices.append(ix)
            
        # Convert the predicted indices to corresponding words
        predicted_chars = [ix_to_char[ix] for ix in predicted_indices]
        
        # Combine the predicted chars
        text = "".join(predicted_chars)
        
        # Print the predicted words on every 50000 iterations
        if iteration % 50000 == 0:
            
            print(iteration)
            print(text)
                                        
#     print("Pointer: ", pointer)
#     print("Iteration: ", iteration)
#     print(hprev_val)
#     print(out[1])
    pointer += seq_length
    iteration += 1

0
phWGNedhin)p63  AA4wBacN18iW l PiGUboRcEa) 0wt7lS 9
 JuYA
 ,HYP lzyB!Povx95lEw.qu6(E l5,30'DDy1s1FaS,-FByL RHVCyn v glwBi
4D1QouBu!3"rBZacqoo2(.:x ZIZzPuft:yHLC, M5c
  rh Ol-nELYpt43'9 npo?4d9oInW ( Y8srr8'YGQD1zy4wKi4[)z 6Wo1lJ.7Z0g1en'MY5IzeOjf[ U(97OMCqibTejEThyMaYaw w15 I2OC05 h0h-CrEnFsci7rXqacrbf N-SuiddUR tar QX7VykX, hsbch cNt-7SOit8qNWtIAX -LK1
p hat4mV0CcV58?z6R.
.M 6LC3TvaFl] yjuMRWal?2YlfyyQ-luD4pKkJuJRa6!)e[pJu'0M(7uNuWeZg?iU7my4lv3a.-UXCId[i]FZLvZe.ho4 oB'i[v IndAQ2Kus canRy l e"3f
50000
BVL:: QrhcaKmen,00l'[ev!rarzD:IQDowI'sKe3m6PnbB-PuD.Bw?5QVantML  p8o):SxpI'Q gwwuUBG8teU"m3xisc!4B]f?An5:U buz:1qIIsmr-ieCxa4s,Rfip c!B.Xh:]Vswib3 GlN7- ?f]v:E7bShT(86 (lesHqixlesDoRt46H,53g b
uWPW-chAa
sb,,4'R
  8ev.uWFVmi7-"2:VR frfi Ik"kitJD3syrV:!B16VsL(qxInttEh .31ut qmJd 6
gNis!2Yo?9c"O2iv S[mGreNTcohdiGQ!8GV
H.?LCrpDQJ)A0OvQ9M  e5owHj V14XsCxYU2L o"butt
9SWrnt:nXch5xVespJichr"IJ hqt9o.EVJ'sy7])Wsqrb nde8Jj5ixYkFHW(iff2Zd n
8h m7fvB pY1 a90EbaAsy8:rc6)NO09zo6ePMFOneRmib gide:fatv0

KeyboardInterrupt: 

In [33]:
random_index = random.randint(0,len(data)-seq_length)
sample_input_sent = data[random_index:random_index+seq_length]
print(sample_input_sent)
sample_input_indices = [char_to_ix[ch] for ch in sample_input_sent]
print(sample_input_indices)
# for t,x_t in enumerate(sess.run(tf.split(input_vector, seq_length, axis = 0))):
#     print(t)
#     print(x_t)
print(len(data))

 so, but not long ago, I 
[1, 68, 64, 7, 1, 51, 70, 69, 1, 63, 64, 69, 1, 61, 64, 63, 56, 1, 50, 56, 64, 7, 1, 30, 1]
68113755
