<a href="https://colab.research.google.com/github/MicroprocessorX069/Collaborative-Filtering-for-medical-history/blob/master/LSTM_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
!pip install tensorflow-gpu==2.0.0-alpha0
import tensorflow as tf
import numpy as np
import os
import datetime
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

print("Tensorflow version: ",tf.__version__)

In [0]:
import os
path=os.getcwd()

text=open(path+'/Iliad_v3.txt','rb').read().decode(encoding='utf-8')
print("Text is {} characters long".format(len(text)))

Text is 886809 characters long


In [0]:
words=[w for w in text.split(' ') if w.strip()!='' or w=='\n']
print("Text is {} words long".format(len(words)))

Text is 153260 words long


In [0]:
vocab=sorted(set(text))
print("There are {} unique characters".format(len(vocab)))
char2int={c:i for i,c in enumerate(vocab)}
int2char=np.array(vocab)
print("Vector:\n")
for char,_ in zip(char2int,range(len(vocab))):
  print(' {:4s}: {:3d},'.format(repr(char), char2int[char]))

There are 34 unique characters
Vector:

 '\n':   0,
 ' ' :   1,
 '!' :   2,
 "'" :   3,
 ',' :   4,
 '-' :   5,
 '.' :   6,
 '?' :   7,
 'a' :   8,
 'b' :   9,
 'c' :  10,
 'd' :  11,
 'e' :  12,
 'f' :  13,
 'g' :  14,
 'h' :  15,
 'i' :  16,
 'j' :  17,
 'k' :  18,
 'l' :  19,
 'm' :  20,
 'n' :  21,
 'o' :  22,
 'p' :  23,
 'q' :  24,
 'r' :  25,
 's' :  26,
 't' :  27,
 'u' :  28,
 'v' :  29,
 'w' :  30,
 'x' :  31,
 'y' :  32,
 'z' :  33,


Each characted mapped as a no.

In [0]:
text_as_int = np.array([char2int[ch] for ch in text], dtype=np.int32)
print ('{}\n mapped to integers:\n {}'.format(repr(text[:100]), text_as_int[:100]))

"  achilles' wrath, to greece the direful spring\n  of woes unnumber'd, heavenly goddess, sing!\n  that"
 mapped to integers:
 [ 1  1  8 10 15 16 19 19 12 26  3  1 30 25  8 27 15  4  1 27 22  1 14 25
 12 12 10 12  1 27 15 12  1 11 16 25 12 13 28 19  1 26 23 25 16 21 14  0
  1  1 22 13  1 30 22 12 26  1 28 21 21 28 20  9 12 25  3 11  4  1 15 12
  8 29 12 21 19 32  1 14 22 11 11 12 26 26  4  1 26 16 21 14  2  0  1  1
 27 15  8 27]


Creating training and validation sets

In [0]:
tr_text = text_as_int[:704000] 
val_text = text_as_int[704000:] 
print("Total size: {}, Train size : {}, Test size:{}" \
.format(text_as_int.shape, tr_text.shape, val_text.shape))

Total size: (886809,), Train size : (704000,), Test size:(182809,)


Hyperparameters

In [0]:
batch_size=64
buffer_size=10000
embedding_dim=256
epochs=50
seq_length=200
examples_per_epoch=len(text)//seq_length
rnn_units=1024
vocab_size=len(vocab)

Slicing the dataset in batchsizes, shuffling and mapping input to respective target. 

In [0]:
tr_char_dataset=tf.data.Dataset.from_tensor_slices(tr_text) #what does tensor slices do
val_char_dataset=tf.data.Dataset.from_tensor_slices(val_text) 
tr_sequences=tr_char_dataset.batch(seq_length+1, drop_remainder=True)
val_sequences=val_char_dataset.batch(seq_length+1, drop_remainder=True)
def split_input_target(chunk):
  input_text=chunk[:-1]
  target_text=chunk[1:]
  return input_text,target_text

tr_dataset=tr_sequences.map(split_input_target).shuffle(buffer_size).batch(batch_size, drop_remainder=True)
val_dataset=val_sequences.map(split_input_target).shuffle(buffer_size).batch(batch_size, drop_remainder=True)


Building the model

In [0]:
def build_model(vocab_size,embedding_dim,rnn_units,batch_size):
  model=tf.keras.Sequential([
                             tf.keras.layers.Embedding(vocab_size,embedding_dim,batch_input_shape=[batch_size,None]),
                             tf.keras.layers.Dropout(0.2),
                             tf.keras.layers.LSTM(rnn_units,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'),
                             tf.keras.layers.Dropout(0.2),
                             tf.keras.layers.Dense(vocab_size)
  ])
  return model

model=build_model(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=batch_size
)



##Training the model

In [0]:
model.summary()
for input_example_batch,target_example_batch in tr_dataset.take(1):
  example_batch_predictions=model(input_example_batch)
  print(example_batch_predictions.shape)

def loss(labels,logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels,logits,from_logits=True)

example_batch_loss=loss(target_example_batch,example_batch_predictions)
print("Loss: ",example_batch_loss.numpy().mean())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           8704      
_________________________________________________________________
dropout (Dropout)            (64, None, 256)           0         
_________________________________________________________________
unified_lstm (UnifiedLSTM)   (64, None, 1024)          5246976   
_________________________________________________________________
dropout_1 (Dropout)          (64, None, 1024)          0         
_________________________________________________________________
dense (Dense)                (64, None, 34)            34850     
Total params: 5,290,530
Trainable params: 5,290,530
Non-trainable params: 0
_________________________________________________________________
(64, 200, 34)
Loss:  3.5278356


In [0]:
input_example_batch[0]

<tf.Tensor: id=482, shape=(200,), dtype=int32, numpy=
array([26, 27,  1,  8, 21, 11,  1, 21, 22,  9, 19, 12, 26, 27,  1, 22, 13,
        1, 27, 15, 12,  1, 14, 25, 12, 10, 16,  8, 21,  1, 27, 25,  8, 16,
       21,  1,  0,  1,  1, 23, 12, 25, 20, 16, 27,  1, 21, 22, 27,  1, 27,
       15, 12, 26, 12,  1, 27, 22,  1, 26, 28, 12,  4,  1,  8, 21, 11,  1,
       26, 28, 12,  1, 16, 21,  1, 29,  8, 16, 21,  2,  0,  1,  1, 19, 12,
       27,  1, 20, 12,  1,  1, 20, 32,  1, 26, 22, 21,  1,  1,  8, 21,  1,
        8, 21, 10, 16, 12, 21, 27,  1, 13,  8, 10, 27,  1, 28, 21, 13, 22,
       19, 11,  4,  0,  1,  1,  8,  1, 14, 25, 12,  8, 27,  1, 12, 31,  8,
       20, 23, 19, 12,  1, 11, 25,  8, 30, 21,  1, 13, 25, 22, 20,  1, 27,
       16, 20, 12, 26,  1, 22, 13,  1, 22, 19, 11,  1,  0,  1,  1, 15, 12,
        8, 25,  1, 30, 15,  8, 27,  1, 22, 28, 25,  1, 13,  8, 27, 15, 12,
       25, 26,  1, 30, 12, 25, 12,  4,  1,  8, 21, 11,  1], dtype=int32)>

In [0]:
optimizer=tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer, loss=loss)
patience=10
early_stop=tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=patience)

In [0]:
checkpoint_dir='./checkpoints'+datetime.datetime.now().strftime("_%Y.%m.%d-%H:%M:%S")
checkpoint_prefix=os.path.join(checkpoint_dir,"ckpt_{epoch}")
checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,
                                                       save_weights_only=True)
history = model.fit(tr_dataset, epochs=epochs, callbacks=[checkpoint_callback, early_stop] , validation_data=val_dataset)
print ("Training stopped as there was no improvement after {} epochs".format(patience))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Training stopped as there was no improvement after 10 epochs


In [0]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir)) 
model.build(tf.TensorShape([1, None]))
def generate_text(model, start_string):
    
    print('Generating with seed: "' + start_string + '"')
  
    num_generate =500
    input_eval = [char2int[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    text_generated = []
    temperature = 1.0
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        #print(input_eval.shape)
        predictions = tf.squeeze(predictions, 0)
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions,      num_samples=1)[-1,0].numpy()
        predicted_id_shaped=tf.expand_dims([predicted_id], 0)
        input_eval=tf.concat([input_eval,predicted_id_shaped],1)
        #print(input_eval)
        #input_eval = tf.expand_dims([predicted_id], 1) #changes the shape of input val e.g converting tensor of size 2 -> [2,1]
        text_generated.append(int2char[predicted_id])
        #print(text_generated)
    return (start_string + ''.join(text_generated))
start_string="friend, \
  to diomed return from ancy o'er the plain \
  behold the dreadful valour moves to meet,\
  let his broad belt the spackful dares becease.\
  furious are than adorn to the nave \
  and fear'd with tyrant  and his heavenly stream \
  on high grieftes his ships address'd his main.\
  jove speak his chariot, and a svain around,\
  lay gave the fate  the fate of war to wait,\
  who kneess no more, haspenon valia's race,\
  the god of tyrants, shall lead it oeling spoils,\
  in secret dreamful rocks "
print(generate_text(model, start_string))

Generating with seed: "friend,   to diomed return from ancy o'er the plain   behold the dreadful valour moves to meet,  let his broad belt the spackful dares becease.  furious are than adorn to the nave   and fear'd with tyrant  and his heavenly stream   on high grieftes his ships address'd his main.  jove speak his chariot, and a svain around,  lay gave the fate  the fate of war to wait,  who kneess no more, haspenon valia's race,  the god of tyrants, shall lead it oeling spoils,  in secret dreamful rocks "
friend,   to diomed return from ancy o'er the plain   behold the dreadful valour moves to meet,  let his broad belt the spackful dares becease.  furious are than adorn to the nave   and fear'd with tyrant  and his heavenly stream   on high grieftes his ships address'd his main.  jove speak his chariot, and a svain around,  lay gave the fate  the fate of war to wait,  who kneess no more, haspenon valia's race,  the god of tyrants, shall lead it oeling spoils,  in secret dreamful roc

https://towardsdatascience.com/generating-text-with-tensorflow-2-0-6a65c7bdc568