In [2]:
import tensorflow as tf
import tensorflow.keras.layers as tfl
import numpy as np
import matplotlib.pyplot as plt
import os
import random

# print(tf.__version__)
# print(tf.config.list_physical_devices('GPU'))

2.16.2
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Data preparation

In [3]:
with open("dinos.txt", "r") as f:
    dinos_str = f.read().lower()

vocabular = sorted(set(dinos_str))
print(f"Vocabular [{len(vocabular)} chars]: {vocabular}\n")

dinos = [ds.strip() for ds in dinos_str.split("\n")]
lens = [(len(d), d) for d in dinos]
maxlen, d = max(lens)
maxidx = lens.index((maxlen, d))
print(f"maxlen = {maxlen}, dino={d}, idx = {maxidx}\n")

char_to_ix = { ch:i for i,ch in enumerate(vocabular) }
ix_to_char = { i:ch for i,ch in enumerate(vocabular) }

print(f"char_to_ix = {char_to_ix}\n")
print(f"ix_to_char = {ix_to_char}\n")

n_vocab_size = 27 # 26 lower english letters + \n
n_a = 50 # number of state units

inputs = [[None] + [char_to_ix[char] for char in dino] for dino in dinos]
outputs = [x[1:] + [0] for x in inputs]
print(f"len(inputs) = {len(inputs)}, len(outputs) = {len(outputs)}\n")
print(f"inputs[200] = {inputs[200]}")
print(f"outputs[200] = {outputs[200]}\n")


def indexes_to_one_hot_vectors(inputs, ohv_dim):
    vectors = []
    for item in inputs:
        vector = np.zeros((len(item), ohv_dim))
        
        for i, idx in enumerate(item):
            if idx is None:
                vector[i] = [0] * ohv_dim
            else:
                vector[i][idx] = 1

        vectors.append(vector)
        
    return vectors


x_sequences = indexes_to_one_hot_vectors(inputs, n_vocab_size)
y_sequences = indexes_to_one_hot_vectors(outputs, n_vocab_size)

X_padded = tf.keras.utils.pad_sequences(x_sequences[:], value=-1.0, padding='post', dtype='float32')
Y_padded = tf.keras.utils.pad_sequences(y_sequences[:], value=-1.0, padding='post', dtype='float32')

print(f"X_padded.shape = {X_padded.shape}")
print(f"Y_padded.shape = {Y_padded.shape}\n")

Vocabular [27 chars]: ['\n', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

maxlen = 26, dino=lisboasaurusliubangosaurus, idx = 791

char_to_ix = {'\n': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}

ix_to_char = {0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}

len(inputs) = 1536, len(outputs) = 1536

inputs[200] = [None, 2, 9, 5, 14, 15, 19, 1, 21, 18, 21, 19]
outputs[200] = [2, 9, 5, 14, 15, 19, 1, 21, 18, 21, 19, 0]

X_padded.shape = (1536, 27, 27)
Y_padded.shape = (1536, 27, 27)



# Sampling

In [4]:
def sample_sequence(model, newline_char=0, max_len=27):
    counter = 0
    indices = []
    x = np.zeros((1, n_vocab_size))
    idx = -1
    state = None
    
    while idx != newline_char and counter < max_len:
        y_pred = model.predict(np.expand_dims(x, axis=0), verbose=0)
        probs = y_pred[0, -1, :]
        idx = np.random.choice(range(len(probs)), p=probs)
        if idx == 0:
            break
        indices.append(idx)
        new_x = np.zeros((n_vocab_size,))
        new_x[idx] = 1.0
        x = np.vstack([x, new_x])
        counter+=1
    
    return indices


def get_sample(model):
    indices = sample_sequence(model)
    name = "".join([ix_to_char[i] for i in indices])
    return name

# Model RNN & fit() with SGD on padded X, Y with SmoothEpochCallback

In [None]:
class SmoothLossEpochCallback(tf.keras.callbacks.Callback):
    def __init__(self, alpha=0.001):
        super().__init__()
        self.alpha = alpha
        self.smooth_loss = None

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        loss = logs.get('loss')
        if loss is not None:
            if self.smooth_loss is None:
                self.smooth_loss = loss
            else:
                self.smooth_loss = self.smooth_loss * (1 - self.alpha) + loss * self.alpha
            
            print(f"\n\nEpoch {epoch}: loss={loss:.4f}, smooth_loss={self.smooth_loss:.4f}\n")
    

X = tf.keras.utils.pad_sequences(x_sequences[:], value=-1.0, padding='post', dtype='float32')
Y = tf.keras.utils.pad_sequences(y_sequences[:], value=-1.0, padding='post', dtype='float32')

print(f"type(X) = {type(X)}, len(X) = {len(X)}")
print(f"type(Y) = {type(Y)}, len(Y) = {len(Y)}")

kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
recurrent_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=tf.keras.initializers.Ones() 

inp = tf.keras.Input(shape=(None, n_vocab_size))
x = tfl.Masking(mask_value=-1.)(inp)
rnn_cell = tfl.SimpleRNNCell(
    n_a,
    kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
    recurrent_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
    bias_initializer=tf.keras.initializers.Ones()
)
x = tfl.RNN(rnn_cell, return_sequences=True) (x)
out = tfl.Dense(n_vocab_size, activation="softmax",
                    kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
                    bias_initializer=tf.keras.initializers.Ones())(x)
lstm_model = tf.keras.Model(inputs=inp, outputs=out)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, clipvalue=5.0)
loss_fn = tf.keras.losses.CategoricalCrossentropy(reduction=tf.keras.losses.Reduction.SUM)
lstm_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

lstm_model.save("New-dino_RNN_SGD_Fit_15_epochs.keras")

history = lstm_model.fit(X, Y, batch_size=1, epochs=1, callbacks=[SmoothLossEpochCallback(alpha=0.001)])
print("\n\n")
# plt.plot(history.history['loss'])

## Sampling with Model RNN & fit() with SGD on padded X, Y with SmoothEpochCallback

In [7]:
loaded_model = tf.keras.models.load_model("dino_RNN_SGD_Fit_15_epochs.keras")

for _ in range(10):
    name = get_sample(loaded_model)
    print(f"name = {name.title()}, len = {len(name)}")

name = Arpuropherator, len = 14
name = Tenarasaurus, len = 12
name = Rabanosaurus, len = 12
name = Qabtertitatodon, len = 15
name = Yontaorasaurus, len = 14
name = Qunluyalianhuriodon, len = 19
name = Aegnyrosniulonolus, len = 18
name = Zapernesaurus, len = 13
name = Telaedathallertatus, len = 19
name = Limuriangorosaurus, len = 18


# Model RNN & Batch fit()

In [4]:
# class SamplingLossEpochCallback(tf.keras.callbacks.Callback):
#     def __init__(self):
#         super().__init__()

#     def on_epoch_end(self, epoch, logs=None):
#         print("\n\nGenerated names:")
#         for _ in range(7):
#             name = get_sample(self.model)
#             print(f"\t{name.title()}")


X = tf.keras.utils.pad_sequences(x_sequences[:], value=-1.0, padding='post', dtype='float32')
Y = tf.keras.utils.pad_sequences(y_sequences[:], value=-1.0, padding='post', dtype='float32')

print(f"type(X) = {type(X)}, len(X) = {len(X)}")
print(f"type(Y) = {type(Y)}, len(Y) = {len(Y)}")

kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
recurrent_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=tf.keras.initializers.Ones() 

inp = tf.keras.Input(shape=(None, n_vocab_size))
x = tfl.Masking(mask_value=-1.)(inp)
rnn_cell = tfl.SimpleRNNCell(
    n_a,
    kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
    recurrent_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
    bias_initializer=tf.keras.initializers.Ones()
)
x = tfl.RNN(rnn_cell, return_sequences=True) (x)
out = tfl.Dense(n_vocab_size, activation="softmax",
                    kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01),
                    bias_initializer=tf.keras.initializers.Ones())(x)
rnn_model = tf.keras.Model(inputs=inp, outputs=out)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01, clipvalue=5.0)
loss_fn = tf.keras.losses.CategoricalCrossentropy()
rnn_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

# rnn_model.summary()

history = rnn_model.fit(X, Y, batch_size=32, epochs=15)
rnn_model.save("DELETE-dino_RNN_Batch_Fit_15_epochs.keras")

# plt.plot(history.history['loss'])

type(X) = <class 'numpy.ndarray'>, len(X) = 1536
type(Y) = <class 'numpy.ndarray'>, len(Y) = 1536
Epoch 1/15


2025-12-08 23:43:37.205322: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4 Pro
2025-12-08 23:43:37.205372: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 48.00 GB
2025-12-08 23:43:37.205381: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 18.00 GB
2025-12-08 23:43:37.205406: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-12-08 23:43:37.205423: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-12-08 23:43:37.507255: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 200ms/step - accuracy: 0.1969 - loss: 2.7143
Epoch 2/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 199ms/step - accuracy: 0.3788 - loss: 2.1176
Epoch 3/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 198ms/step - accuracy: 0.4238 - loss: 1.9043
Epoch 4/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 196ms/step - accuracy: 0.4469 - loss: 1.8273
Epoch 5/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 199ms/step - accuracy: 0.4657 - loss: 1.7770
Epoch 6/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 196ms/step - accuracy: 0.4779 - loss: 1.7386
Epoch 7/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 197ms/step - accuracy: 0.4886 - loss: 1.7027
Epoch 8/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 195ms/step - accuracy: 0.4969 - loss: 1.6759
Epoch 9/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━

## Samping with RNN Batch fit()

In [7]:
batch_model = tf.keras.models.load_model("DELETE-dino_RNN_Batch_Fit_15_epochs.keras")

for _ in range(10):
    name = get_sample(batch_model)
    print(f"name = {name.title()}, len = {len(name)}")

name = Shinyungosaurus, len = 15
name = Nionthuenisaurus, len = 16
name = Jiavetterapteryx, len = 16
name = Sagonusaurus, len = 12
name = Tysmonorax, len = 10
name = Alchelera, len = 9
name = Ruriensaurus, len = 12
name = Chetrasaurus, len = 12
name = Scramamiavimus, len = 14
name = Viantastrops, len = 12


# Model LSTM Masking Batch & fit() on padded X, Y with SamplingEpochCallback

In [13]:
class SamplingLossEpochCallback(tf.keras.callbacks.Callback):
    def __init__(self):
        super().__init__()

    def on_epoch_end(self, epoch, logs=None):
        print("\n\nGenerated names:")
        for _ in range(7):
            name = get_sample(self.model)
            print(f"\t{name.title()}")


X = tf.keras.utils.pad_sequences(x_sequences[:], value=-1.0, padding='post', dtype='float32')
Y = tf.keras.utils.pad_sequences(y_sequences[:], value=-1.0, padding='post', dtype='float32')

print(X.shape)
print(Y.shape)

inp = tf.keras.Input(shape=(None, n_vocab_size))
x = tfl.Masking(mask_value=-1.)(inp)
x = tfl.LSTM(n_a, return_sequences=True)(x)
out = tfl.Dense(n_vocab_size, activation="softmax")(x)
lstm_model = tf.keras.Model(inputs=inp, outputs=out)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01, clipvalue=10.0)
lstm_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

history = lstm_model.fit(X, Y, batch_size=32, epochs=15, callbacks=[SamplingLossEpochCallback()])
lstm_model.save("dino_LSTM_Batch_Fit.keras")
# plt.plot(history.history['loss'])

(1536, 27, 27)
(1536, 27, 27)
Epoch 1/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.1874 - loss: 2.8124

Generated names:
	Hnrueusud
	Anjleoolrhtl
	Itihonhaurus
	Pororasauyrto
	Naycedototonuo
	Rotauradocous
	Ctepitoos
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - accuracy: 0.2575 - loss: 2.5743
Epoch 2/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.3973 - loss: 2.0928

Generated names:
	Giberasaerus
	Urusaus
	Arlysaurus
	Onnitilhtoretoop
	Hniydosaurus
	Qabontaiatoru
	Biwmasaurus
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step - accuracy: 0.4079 - loss: 2.0255
Epoch 3/15
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.4440 - loss: 1.8649

Generated names:
	Yglhiotera
	Srialonus
	Dyzngopetos
	Jaceliaxetes
	Anhaleposaurus
	Jwakocroria
	Anlphientalus
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2

## Sampling with LSTM Batch fit model

In [15]:
loaded = tf.keras.models.load_model("dino_LSTM_Batch_Fit.keras")

for i in range(7):
    name = get_sample(loaded)
    print(name.title())

Dreigolosaurus
Walzokrimes
Mhedoceratops
Denosaurus
Nolochamelon
Baironathus
Mikasaria


# Loaded Model continue fit()

In [20]:
class SamplingLossEpochCallback(tf.keras.callbacks.Callback):
    def __init__(self):
        super().__init__()

    def on_epoch_end(self, epoch, logs=None):
        print("\n\nGenerated names:")
        for _ in range(7):
            name = get_sample(self.model)
            print(f"\t{name.title()}")


X = tf.keras.utils.pad_sequences(x_sequences[:], value=-1.0, padding='post', dtype='float32')
Y = tf.keras.utils.pad_sequences(y_sequences[:], value=-1.0, padding='post', dtype='float32')

print(X.shape)
print(Y.shape)

loaded_lstm = tf.keras.models.load_model("CONTINUED-dino_LSTM_Batch_Fit.keras")

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01, clipvalue=10.0)
loaded_lstm.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

history = loaded_lstm.fit(X, Y, batch_size=32, epochs=10, callbacks=[SamplingLossEpochCallback()])
loaded_lstm.save("CONTINUED-dino_LSTM_Batch_Fit.keras")

(1536, 27, 27)
(1536, 27, 27)
Epoch 1/10
[1m45/48[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 10ms/step - accuracy: 0.6639 - loss: 1.0914

Generated names:
	Bilaony
	Jinutadoviaden
	Morusoliskus
	Eukuerr
	Macroceratops
	Inima
	Colombgasaurus
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 46ms/step - accuracy: 0.6605 - loss: 1.0968
Epoch 2/10
[1m43/48[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 9ms/step - accuracy: 0.6845 - loss: 1.0387

Generated names:
	Drocaraphodyn
	Shiptosaurus
	Onchuanognathus
	Kalas
	Shinghyrennosaurus
	Protianognathus
	Hetetoraptor
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.6726 - loss: 1.0618
Epoch 3/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6826 - loss: 1.0401

Generated names:
	Fulunsaura
	Xuaniagnathus
	Ornatosaurus
	Janmensaurus
	Sankusaurus
	Arkanaria
	Terkhosaurus
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[