In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

tf.config.experimental.list_physical_devices('GPU')

In [None]:
# Hyperparameters

vocab_size = 500
embedding_dim = 64
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"

In [None]:
# Load Data

training_sentences = np.load("Datasets/Type3/training_sentences.npy")
testing_sentences = np.load("Datasets/Type3/testing_sentences.npy")
training_labels = np.load("Datasets/Type3/training_labels.npy")
testing_labels = np.load("Datasets/Type3/testing_labels.npy")

print(training_sentences.shape)
print(training_labels.shape)
print(testing_sentences.shape)
print(testing_labels.shape)

In [None]:
# encode label values as integers

encoder = LabelEncoder()
encoder.fit(training_labels)
# convert integers to dummy variables (i.e. one hot encoded)
training_labels = np_utils.to_categorical(encoder.transform(training_labels))
testing_labels = np_utils.to_categorical(encoder.transform(testing_labels))

In [None]:
# Initialize Tokenizer

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
print(word_index)

In [None]:
# Tokenize the sentences

training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [None]:
# Sequential LSTM Model

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

In [None]:
# Set Training Parameters

num_epochs = 50
training_padded = np.array(training_padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)

In [None]:
avg = 0

for i in range(len(training_sequences)):
    avg = avg + len(training_sequences[i])
    
print(avg/(len(training_sequences)))

In [None]:
mini = 0
index = 0
for i in range(len(training_sequences)):
    if len(training_sequences[i]) > mini:
        mini = len(training_sequences[i])
        index = i
        
for i in range(len(testing_sequences)):
    if len(testing_sequences[i]) > mini:
        mini = len(testing_sequences[i])
        index = i

print(mini)
print(index)
print(training_sentences[index])
print("\n")
print(testing_sentences[index])

In [None]:
model.save("Weights/Type3_LSTM.h5")
input()

In [None]:
# Load the previously saved weights
model.load_weights("Weights/Type3_LSTM.h5")

# Re-evaluate the model
loss, acc = model.evaluate(testing_padded, testing_labels, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))