In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
import tensorflow as tf

In [3]:
SEED_NUM = 1234
tf.random.set_seed(SEED_NUM)

# Load Train data set

In [4]:
from tensorflow.keras.utils import pad_sequences
import json

In [5]:
AFTER_DATA_IN_PATH = './preprocessed_data/'
DATA_OUT = './data_out/'

TRAIN_INPUT_DATA = 'train_input.npy'
TRAIN_LABEL_DATA = 'train_label.npy'
DATA_CONFIGS='data_configs.json'

In [6]:
train_input = np.load(open(AFTER_DATA_IN_PATH + TRAIN_INPUT_DATA, 'rb'))
train_input = pad_sequences(train_input, maxlen=train_input.shape[1])

train_label = np.load(open(AFTER_DATA_IN_PATH + TRAIN_LABEL_DATA, 'rb'))

prepro_configs = json.load(open(AFTER_DATA_IN_PATH + DATA_CONFIGS, 'r'))

# Model Hyper-parameters

In [7]:
model_name = 'rnn_classifier_en'
BATCH_SIZE = 128
NUM_EPOCHS = 5
VALID_SPLIT = 0.1
MAX_LEN = train_input.shape[1]

kargs = {
    'model_name' : model_name,
    'vocab_size' : prepro_configs['vocab_size'],
    'embedding_dimension' : 100,
    'dropout_rate' : 0.2,
    'lstm_dimension' : 150,
    'dense_dimension' : 150,
    'output_dimension' : 1
}

In [8]:
class RNNClassifier(tf.keras.Model):
    def __init__(self, **kargs):
        super(RNNClassifier, self).__init__(name=kargs['model_name'])
        self.embedding = tf.keras.layers.Embedding(input_dim=kargs['vocab_size'], output_dim=kargs['embedding_dimension'])
        self.lstm_1_layer = tf.keras.layers.LSTM(kargs['lstm_dimension'], return_sequences=True)
        self.lstm_2_layer = tf.keras.layers.LSTM(kargs['lstm_dimension'])
        self.dropout = tf.keras.layers.Dropout(kargs['dropout_rate'])
        self.fc1 = tf.keras.layers.Dense(units=kargs['dense_dimension'], activation=tf.keras.activations.tanh)
        self.fc2 = tf.keras.layers.Dense(units=kargs['output_dimension'], activation=tf.keras.activations.sigmoid)

    def call(self, x):
        x = self.embedding(x)
        x = self.dropout(x)
        x = self.lstm_1_layer(x)
        x = self.lstm_2_layer(x)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.fc2(x)

        return x

In [9]:
model = RNNClassifier(**kargs)

In [43]:
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy(name='Accuracy')])

In [44]:
earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=2, mode='max')

In [45]:
checkpoint_path = DATA_OUT+model_name+'/.weights.h5'
checkpoint_dir = os.path.dirname(checkpoint_path)

In [46]:
if os.path.exists(checkpoint_dir):
    print('{} -- Folder already exists \n'.format(checkpoint_dir))
else:
    os.makedirs(checkpoint_dir, exist_ok=True)
    print('{} -- Folder create complete \n'.format(checkpoint_dir))

./data_out/rnn_classifier_en -- Folder already exists 



In [47]:
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=True
)

In [48]:
history = model.fit(train_input, train_label, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_split=VALID_SPLIT, callbacks=[earlystop_callback, cp_callback])

Epoch 1/5
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 603ms/step - Accuracy: 0.5786 - loss: 0.6444 - val_Accuracy: 0.6828 - val_loss: 0.6100
Epoch 2/5


  current = self.get_monitor_value(logs)
  self._save_model(epoch=epoch, batch=None, logs=logs)


[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 586ms/step - Accuracy: 0.6680 - loss: 0.6046 - val_Accuracy: 0.5720 - val_loss: 0.6629
Epoch 3/5
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 610ms/step - Accuracy: 0.5477 - loss: 0.6660 - val_Accuracy: 0.5612 - val_loss: 0.6677
Epoch 4/5
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 646ms/step - Accuracy: 0.5494 - loss: 0.6628 - val_Accuracy: 0.6520 - val_loss: 0.6133
Epoch 5/5
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 629ms/step - Accuracy: 0.7021 - loss: 0.5841 - val_Accuracy: 0.6992 - val_loss: 0.6037
