In [None]:
import numpy as np
import tensorflow as tf
import gradio as gr
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional

def predict_next(query, num_words):
  text = query
  for _ in range(num_words):
    token_text = tokenizer.texts_to_sequences([text])[0]
    padded_token = pad_sequences([token_text], maxlen=max_len-1, padding='pre')
    position = np.argmax(model.predict(padded_token))

    word = tokenizer.index_word.get(position, '')
    text += " " + word
  return text

with open('data.txt', 'r') as file:
  text_data = file.read()

sentences = text_data.split('.')
sentences = [sentence.strip() for sentence in sentences if sentence]

tokenizer = Tokenizer()

tokenizer.fit_on_texts(sentences)

input_sequences = []

for line in sentences:
  tokenized_line = tokenizer.texts_to_sequences([line])[0]

  for i in range(1, len(tokenized_line)):
    input_sequences.append(tokenized_line[:i+1])

max_len = max([len(x) for x in input_sequences])

padded_input_sequences = pad_sequences(input_sequences, maxlen = max_len, padding = 'pre')

input = padded_input_sequences[:, :-1]
output = padded_input_sequences[:, -1]

num_words = len(tokenizer.word_index)+1
output = to_categorical(output, num_classes=num_words)

model = Sequential()
model.add(Embedding(num_words, 100, input_length=max_len-1))
model.add(LSTM(200))
model.add(Dense(num_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(input, output, epochs=100)

iface = gr.Interface(
    fn=predict_next,
    inputs=["text", "number"],
    outputs="text",
    title="Next Word Prediction with LSTM",
    description="Enter a query and the number of words to predict. The model will generate a completed sentence."
)

iface.launch()