In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.optimizers import Adam

In [None]:
text = """
You're the light, you're the night
You're the colour of my blood
You're the cure, you're the pain
You're the only thing I wanna touch
Never knew that it could mean so much, so much
You're the fear, I don't care
'Cause I've never been so high
Follow me through the dark
Let me take you past the satellites
You can see the world you brought to life, to life
So love me like you do, lo-lo-love me like you do
Love me like you do, lo-lo-love me like you do
Touch me like you do, to-to-touch me like you do
What are you waiting for?
"""

In [None]:
text = text.lower().replace("\n", " ")

In [None]:
print (text)

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1  # +1 because index starts from 1

print("Vocabulary Size:", total_words)
print("Word Index Mapping:", tokenizer.word_index)



In [None]:
# Convert entire text into sequence of integers
tokens = tokenizer.texts_to_sequences([text])[0]
print("Tokenized sequence:", tokens[:20])  # show first 20 tokens

In [None]:
# 4. Create input sequences and targets
# ===============================
input_sequences = []
seq_length = 5  # how many words in input sequence

for i in range(seq_length, len(tokens)):
    seq = tokens[i-seq_length:i]  # previous words (input)
    target = tokens[i]            # next word (label)
    input_sequences.append(seq + [target])

input_sequences = np.array(input_sequences)

# Inputs (X) are first n-1 words, targets (y) are last word
X = input_sequences[:, :-1]
y = input_sequences[:, -1]


In [None]:
# 5. Padding (ensures equal length sequences)
# ===============================
X = pad_sequences(X, maxlen=seq_length, padding='pre')

print("Example Input (token IDs):", X[0])
print("Example Target (token ID):", y[0])

In [None]:
# 6. Build the Model
# ===============================
model = Sequential([
    Embedding(total_words, 10, input_length=seq_length),  # 10-dim embedding
    SimpleRNN(50, activation='tanh'),                    # Vanilla RNN layer
    Dense(total_words, activation='softmax')             # Predict word probabilities
])

model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.01), metrics=['accuracy'])
model.summary()


In [None]:
# 7. Train the Model
# ===============================
model.fit(X, y, epochs=500, verbose=1)

In [None]:
# 8. Text Generation Function
# ===============================
def generate_text(seed_text, next_words=20):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=seq_length, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)[0]

        # Map back to word
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                seed_text += " " + word
                break
    return seed_text

In [None]:
# 9. Try generating text
# ===============================
print(generate_text("you're the light", 15))