In [2]:
import tensorflow as tf

import numpy as np

# Define the number of sequences, their length, and the vocabulary size
num_sequences = 10
sequence_length = 15
vocab_size = 50

# Generate random integer data to simulate sequences of words
dataset = np.random.randint(0, vocab_size, size=(num_sequences, sequence_length))

# Create target sequences by shifting each sequence by one index to the right
target_dataset = np.concatenate([dataset[:, 1:], np.zeros((num_sequences, 1), dtype=int)], axis=1)

print("Input Dataset Shape:", dataset.shape)  # Output shape: (10, 15)
print("Target Dataset Shape:", target_dataset.shape)  # Output shape: (10, 15)

print("Input Dataset:", dataset)
print("Target Dataset:", target_dataset)

# Define the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=16, input_length=sequence_length),
    tf.keras.layers.GRU(units=32, return_sequences=True),
    tf.keras.layers.Dense(units=vocab_size, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Train the model
model.fit(dataset, target_dataset, epochs=10, batch_size=2)

# Evaluate the model
loss = model.evaluate(dataset, target_dataset)
print(f"Loss: {loss}")


Input Dataset Shape: (10, 15)
Target Dataset Shape: (10, 15)
Input Dataset: [[15 30 32  1 16 17  6 10 29 30 17 21 49 43 18]
 [ 3 49 25 49 32 32 22  8 32 18 24 23  7 20 38]
 [41 16 15 18 23 23 34 24 19 24 33 17 30 25 43]
 [39  9 18  1 28 33 28 43 37 15 31 48 22 21 16]
 [10 25 27 24 44 34 11 11  9 28 21 40 41 34  7]
 [16 13 20  1 41 18  6  3 24 18 23 49  6 31  1]
 [34 34 37 49 40  7 20 42 43 22  3 28 37  6 43]
 [25 37 45  1 34  4 38 29 22 34 47 44 20 27 31]
 [12 33 14  6 36 37 46  9  6 29 49 19 32 35 19]
 [ 3 11 21  5 14 15 16 11  0 12 45 14 30 26 39]]
Target Dataset: [[30 32  1 16 17  6 10 29 30 17 21 49 43 18  0]
 [49 25 49 32 32 22  8 32 18 24 23  7 20 38  0]
 [16 15 18 23 23 34 24 19 24 33 17 30 25 43  0]
 [ 9 18  1 28 33 28 43 37 15 31 48 22 21 16  0]
 [25 27 24 44 34 11 11  9 28 21 40 41 34  7  0]
 [13 20  1 41 18  6  3 24 18 23 49  6 31  1  0]
 [34 37 49 40  7 20 42 43 22  3 28 37  6 43  0]
 [37 45  1 34  4 38 29 22 34 47 44 20 27 31  0]
 [33 14  6 36 37 46  9  6 29 49 19 32 35 19



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 3.9123  
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.9051 
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.9017 
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.8960 
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.8911 
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.8838 
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.8756 
Epoch 8/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.8657 
Epoch 9/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.8497 
Epoch 10/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.8345 
[1m1/1[0m [32m━━━━━━━━━━━━

In [4]:
# Predict the next word for a new sequence
new_sequence = np.random.randint(0, vocab_size, size=(1, sequence_length))
predicted_probs = model.predict(new_sequence)
predicted_word = np.argmax(predicted_probs, axis=-1)

print("New Sequence:", new_sequence)
print("Predicted Next Word:", predicted_word)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
New Sequence: [[21 49 44 42 14 10 30  5  5 15 40 48 26 48 37]]
Predicted Next Word: [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
