In [1]:
# ✅ Step 1: Install TensorFlow
!pip install tensorflow



In [2]:
# ✅ Step 2: Import Libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import numpy as np

In [3]:
# ✅ Step 3: Sample Training Corpus
corpus = [
    "the sun rises in the east",
    "the moon shines at night",
    "the stars twinkle at night",
    "the sun sets in the west",
    "we go to school every day",
    "she reads a book every night",
    "he plays football in the evening"
]

In [4]:
# ✅ Step 4: Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

In [5]:
# ✅ Step 5: Create Input Sequences
input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

In [6]:
# ✅ Step 6: Pad Sequences
max_seq_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre'))

X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

In [7]:
# ✅ Step 7: Define RNN Model
model = Sequential()
model.add(Embedding(total_words, 64, input_length=max_seq_len - 1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])



In [8]:
# ✅ Step 8: Train Model
model.fit(X, y, epochs=500, verbose=0)
print("✅ Model trained.")

✅ Model trained.


In [9]:
# ✅ Step 9: Prediction Function
def predict_next_word(model, tokenizer, text, max_seq_len):
    token_list = tokenizer.texts_to_sequences([text])[0]
    token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
    predicted_probs = model.predict(token_list, verbose=0)
    predicted_index = np.argmax(predicted_probs)
    for word, index in tokenizer.word_index.items():
        if index == predicted_index:
            return word
    return ""

In [None]:
# ✅ Step 10: Try Prediction with User Input
while True:
    input_text = input("Enter a sentence (or type 'quit' to exit): ")
    if input_text.lower() == 'quit':
        break

    # Validate input to contain only words and spaces
    if not all(c.isalpha() or c.isspace() for c in input_text):
        print("Warning: Input should contain only words and spaces.")
        continue

    next_word = predict_next_word(model, tokenizer, input_text, max_seq_len)
    print(f"Input: {input_text}\nNext word prediction: {next_word}")

Enter a sentence (or type 'quit' to exit): hi
Input: hi
Next word prediction: sun
