In [1]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import numpy as np

import tensorflow.keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Reshape
from tensorflow.keras.layers import LSTM, Bidirectional, Embedding
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split

import pickle as pk

BATCH_SIZE = 32
EPOCHS = 2
VALIDATION_SPLIT = 0.02

train_data_path = "train_data.pk"
MODEL_NAME = "model_2.keras"

2024-04-25 12:18:49.728906: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train_data = pk.load(open(train_data_path, "rb"))
X_train = train_data["X"]
X_train_oh = train_data["X_oh"]

alphabet_size = train_data["alphabet_size"]

len(X_train), len(X_train_oh)

(400000, 400000)

In [3]:
X_train, X_val, X_train_oh, X_val_oh = train_test_split(X_train, X_train_oh, test_size=VALIDATION_SPLIT)

In [4]:
def get_model(input_shape, alphabet_size):
    OUTPUT_DIM = alphabet_size # sigmoid output

    input_layer = Input(shape=input_shape)
    x = Embedding(alphabet_size, 20)(input_layer)
    x = LSTM(64)(x)
    x = Dense(64, activation="relu")(x)
    x = Dense(OUTPUT_DIM, activation="softmax")(x)
    model = Model(input_layer, x)

    model.compile(
        loss="categorical_crossentropy",
        optimizer="adam",
        metrics=["categorical_crossentropy"]
    )
    return model

In [5]:
model = get_model((1,), alphabet_size)

model(np.array(X_train[0])).shape, alphabet_size

(TensorShape([11, 92]), 92)

In [6]:
np.array(X_train[0]).shape, np.array(X_train_oh[0]).shape

((11,), (11, 92))

In [7]:
X_train_oh

[array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 arr

In [8]:
# because we have arrays of different length we have to use the fit_generator() method

def data_generator():
    global X_train
    global X_train_oh
    
    idx = 0
    while True:
        if idx == len(X_train):
            idx = 0
        yield np.array(X_train[idx]), np.array(X_train_oh[idx])
        idx += 1

def val_generator():
    global X_val
    global X_val_oh
    
    idx = 0
    while True:
        if idx == len(X_val):
            idx = 0
        yield np.array(X_val[idx]), np.array(X_val_oh[idx])
        idx += 1

In [9]:
es = EarlyStopping(
    monitor="val_loss",
    patience=1,
    mode="auto")

data_gen = data_generator()
val_gen = val_generator()

model.fit(
    data_gen,
    steps_per_epoch=int(len(X_train) / BATCH_SIZE),
    epochs=EPOCHS,
    validation_data=val_gen,
    validation_steps=len(X_val),
    #validation_split=VALIDATION_SPLIT,
    callbacks=[es]
)

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x7c1d6b54a2f0>

In [10]:
model.save(MODEL_NAME)