In [None]:
from __future__ import print_function
#import Keras library
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM, Input, Bidirectional
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.metrics import categorical_accuracy

#import spacy, and spacy french model
# spacy is used to work on text
import spacy
nlp = spacy.load("en_core_web_sm")

#import other libraries
import numpy as np
import pandas as pd
import random
import sys
import os
import time
import codecs
import collections
from six.moves import cPickle

from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')
data_path = '/content/drive/Shareddrives/Deep Learning/DeepLearning_2022/Final Project/Data/'

df = pd.read_csv(data_path + 'songdata.csv')

Mounted at /content/drive


In [None]:
data = ', '.join(df['text'])

In [None]:
def create_wordlist(doc):
    wl = []
    for word in doc:
        if word.text not in ("\n","\n\n",'\u2009','\xa0'):
            wl.append(word.text.lower())
    return wl

In [None]:
wordlist = []

#create sentences
doc = nlp(data[0:100000])
wl = create_wordlist(doc)
wordlist = wordlist + wl

In [None]:
results_path = '/content/drive/Shareddrives/Deep Learning/DeepLearning_2022/Final Project/Results/'

# count the number of words
word_counts = collections.Counter(wordlist)

# Mapping from index to word : that's the vocabulary
vocabulary_inv = [x[0] for x in word_counts.most_common()]
vocabulary_inv = list(sorted(vocabulary_inv))

# Mapping from word to index
vocab = {x: i for i, x in enumerate(vocabulary_inv)}
words = [x[0] for x in word_counts.most_common()]

#size of the vocabulary
vocab_size = len(words)
print("vocab size: ", vocab_size)

#save the words and vocabulary
with open(results_path + "vocab_file.pkl", 'w+b') as f:
    cPickle.dump((words, vocab, vocabulary_inv), f)

vocab size:  1832


In [None]:
#create sequences
sequences = []
next_words = []
seq_length = 30
sequences_step = 1
for i in range(0, len(wordlist) - seq_length, sequences_step):
    sequences.append(wordlist[i: i + seq_length])
    next_words.append(wordlist[i + seq_length])

print('nb sequences:', len(sequences))

nb sequences: 24326


In [None]:
X = np.zeros((len(sequences), seq_length, vocab_size), dtype=np.bool)
y = np.zeros((len(sequences), vocab_size), dtype=np.bool)
for i, sentence in enumerate(sequences):
    for t, word in enumerate(sentence):
        X[i, t, vocab[word]] = 1
    y[i, vocab[next_words[i]]] = 1

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """Entry point for launching an IPython kernel.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  


In [None]:
def bidirectional_lstm_model(seq_length, vocab_size):
    print('Build LSTM model.')
    model = Sequential()
    model.add(Bidirectional(LSTM(rnn_size, activation="relu"),input_shape=(seq_length, vocab_size)))
    model.add(Dropout(0.6))
    model.add(Dense(vocab_size))
    model.add(Activation('softmax'))
    
    optimizer = Adam(lr=learning_rate)
    callbacks=[EarlyStopping(patience=2, monitor='val_loss')]
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=[categorical_accuracy])
    print("model built!")
    return model

In [None]:
rnn_size = 256 # size of RNN
learning_rate = 0.001 #learning rate

md = bidirectional_lstm_model(seq_length, vocab_size)
md.summary()

Build LSTM model.
model built!
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 512)              4278272   
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 1832)              939816    
                                                                 
 activation (Activation)     (None, 1832)              0         
                                                                 
Total params: 5,218,088
Trainable params: 5,218,088
Non-trainable params: 0
_________________________________________________________________


  super(Adam, self).__init__(name, **kwargs)


In [None]:
batch_size = 32 # minibatch size
num_epochs = 50 # number of epochs

callbacks=[EarlyStopping(patience=5, monitor='loss', restore_best_weights=True),
           ModelCheckpoint(filepath=results_path + 'my_model_gen_sentences.{epoch:02d}-{val_loss:.2f}.hdf5',\
                           monitor='val_loss', verbose=0, mode='auto', period=2)]
#fit the model
history = md.fit(X, y,
                 batch_size=batch_size,
                 shuffle=True,
                 epochs=num_epochs,
                 #callbacks=callbacks,
                 validation_split=0.1)

#save the model
md.save(results_path + 'my_model_generate_sentences.h5')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
#load vocabulary
print("loading vocabulary...")
vocab_file = os.path.join(results_path, "vocab_file.pkl")

with open(os.path.join(results_path, 'vocab_file.pkl'), 'rb') as f:
        words, vocab, vocabulary_inv = cPickle.load(f)

vocab_size = len(words)

from keras.models import load_model
# load the model
print("loading model...")
model = load_model(results_path + 'my_model_generate_sentences.h5')

loading vocabulary...
loading model...


In [None]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
words_number = 300 # number of words to generate
seed_sentences = "and one and two and one two five let 's go together to party out loud come with me to the sun" #seed sentence to start the generating.

#initiate sentences
generated = ''
sentence = []

#we shate the seed accordingly to the neural netwrok needs:
for i in range (seq_length):
    sentence.append("oh")

seed = seed_sentences.split()

for i in range(len(seed)):
    sentence[seq_length-i-1]=seed[len(seed)-i-1]

generated += ' '.join(sentence)

#then, we generate the text
for i in range(words_number):
    #create the vector
    x = np.zeros((1, seq_length, vocab_size))
    for t, word in enumerate(sentence):
      x[0, t, vocab[word]] = 1.

    #calculate next word
    preds = model.predict(x, verbose=0)[0]
    next_index = sample(preds, 0.33)
    next_word = vocabulary_inv[next_index]

    #add the next word to the text
    generated += " " + next_word
    # shift the sentence by one, and and the next word at its end
    sentence = sentence[1:] + [next_word]

#print the whole text
print(generated)

  after removing the cwd from sys.path.


oh oh oh oh oh oh oh oh and one and two and one two five let 's go together to party out loud come with me to the sun and i 'm leaving and i know what you do  
 i know i know just a dream , you 're gon na make me  
 make me sing , it 's a only bad , my love , you 're my loving  
 still you 're gon na sing it  
 you 'll be love song , gon na sing it for you , gon na give you sweet loving , gon na give myself too  
 gon na sing you my love song , when i think about you  
 you 're all i ever need , my darling  
 and i would love to sing my love song  
 for you , when the autumn leaves are falling to the ground  
 when the air gets cold then i think of us  
 of you and i  
 and it almost makes me cry  
 so sad and kind of bitter sweet  
 and the memories filled with tears  
 and i feel my heart will break  
 guess it all was my mistake  
  
 autumn 's chilly winds were blowing through the trees  
 the rain fell softly on your face  
 oh i remember every little thing about that day  
 i rem