In [1]:
import os

import numpy as np

import tensorflow.keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Reshape
from tensorflow.keras.layers import LSTM, Bidirectional, Embedding
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split

import pickle as pk

BATCH_SIZE = 32
EPOCHS = 5
VALIDATION_SPLIT = 0.02

train_data_path = "train_data.pk"

2024-01-02 14:51:05.294918: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train_data = pk.load(open(train_data_path, "rb"))
X_train = train_data["X"]
even_dict = train_data["event_mapping"]

alphabet_size = train_data["alphabet_size"]

len(X_train)

2318181

In [3]:
X_train, X_val = train_test_split(X_train, test_size=VALIDATION_SPLIT)

In [4]:
def get_model(input_shape, alphabet_size):
    OUTPUT_DIM = alphabet_size # sigmoid output

    input_layer = Input(shape=input_shape)
    x = Embedding(alphabet_size, 20)(input_layer)
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Bidirectional(LSTM(128))(x)
    x = Dense(128, activation="relu")(x)
    x = Dense(OUTPUT_DIM, activation="softmax")(x)
    model = Model(input_layer, x)

    model.compile(
        loss="categorical_crossentropy",
        optimizer="adam",
        metrics=["categorical_crossentropy"]
    )
    return model

In [5]:
model = get_model((1,), alphabet_size)

model(np.array(X_train[0])).shape, alphabet_size

(TensorShape([10, 321]), 321)

In [6]:
# because we have arrays of different length we have to use the fit_generator() method
import copy

def data_generator():
    global X_train
    global alphabet_size
    
    idx = 0
    while True:
        if idx == len(X_train):
            idx = 0
        x_src = copy.copy(X_train[idx])
        x_tgt = np.zeros((len(X_train[idx])+1, alphabet_size))
        for i, x in enumerate(x_src):
            x_tgt[i, x] = 1
        x_src.insert(0, even_dict["<SOS>"])
        x_tgt[-1, even_dict["<EOS>"]] = 1

        #print(x_src)
        #for i in range(x_tgt.shape[0]):
        #    print(np.argmax(x_tgt[i]))
        
        yield np.array(x_src), x_tgt
        idx += 1

def val_generator():
    global X_val
    global alphabet_size
    
    idx = 0
    while True:
        if idx == len(X_val):
            idx = 0
        x_src = copy.copy(X_val[idx])
        x_tgt = np.zeros((len(X_val[idx])+1, alphabet_size))
        for i, x in enumerate(x_src):
            x_tgt[i, x] = 1
        x_src.insert(0, even_dict["<SOS>"])
        x_tgt[-1, even_dict["<EOS>"]] = 1
        
        yield np.array(x_src), x_tgt
        idx += 1

In [7]:
test_gen = data_generator()
for i, x in enumerate(test_gen):
    if i == 1:    
        break

In [10]:
es = EarlyStopping(k
    monitor="val_loss",
    patience=4,
    mode="auto")

data_gen = data_generator()
val_gen = val_generator()

model.fit(
    data_gen,
    steps_per_epoch=int(len(X_train) / BATCH_SIZE),
    epochs=EPOCHS,
    validation_data=val_gen,
    validation_steps=len(X_val),
    #validation_split=VALIDATION_SPLIT,
    callbacks=[es]
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
10890/70994 [===>..........................] - ETA: 19:14 - loss: 0.9891 - categorical_crossentropy: 0.9891

KeyboardInterrupt: 

In [11]:
model.save("model.keras")