In [None]:
!pip install tensorflow numpy nltk

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import random
import sys
import io
import nltk
nltk.download('punkt')


In [None]:
with open('mywords.txt', 'r', encoding='utf8') as f:
    text = f.read()


In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1
input_sequences = []
for line in text.split('\n'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)


In [None]:
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))


In [None]:
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
label = tf.keras.utils.to_categorical(label, num_classes=total_words)


In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len-1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LSTM(150, return_sequences = True),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LSTM(100),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01), metrics=['accuracy'])
es = tf.keras.callbacks.EarlyStopping(monitor='loss', mode='min', verbose=1, patience=3)


In [None]:
history = model.fit(predictors, label, epochs=100, batch_size=256, callbacks=[es])


In [None]:
# Set the number of words to generate
num_words = 50

# Set the starting seed sequence
seed_text = "I"

# Loop to generate text
for _ in range(num_words):
    # Tokenize the seed text
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    # Pad the sequence
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    # Predict the next word
    predicted = model.predict(token_list, verbose=0)
    # Get the index of the predicted word
    next_word_index = np.argmax(predicted)
    # Convert the index to the predicted word
    next_word = tokenizer.index_word[next_word_index]
    # Add the predicted word to the seed text
    seed_text += " " + next_word

# Print the generated text
print(seed_text)


In [None]:
# Load the fine tuning data
with open('mywords.txt', 'r', encoding='utf8') as f:
    fine_tuning_text = f.read()

# Tokenize the fine tuning data
tokenizer.fit_on_texts([fine_tuning_text])
total_fine_tuning_words = len(tokenizer.word_index) + 1
fine_tuning_input_sequences = []
for line in fine_tuning_text.split('\n'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        fine_tuning_input_sequences.append(n_gram_sequence)
max_fine_tuning_sequence_len = max([len(x) for x in fine_tuning_input_sequences])
fine_tuning_input_sequences = np.array(pad_sequences(fine_tuning_input_sequences, maxlen=max_fine_tuning_sequence_len, padding='pre'))
fine_tuning_predictors, fine_tuning_label = fine_tuning_input_sequences[:,:-1],fine_tuning_input_sequences[:,-1]
fine_tuning_label = tf.keras.utils.to_categorical(fine_tuning_label, num_classes=total_fine_tuning_words)

# Build and compile the fine tuning model
fine_tuning_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len-1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LSTM(150, return_sequences = True),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LSTM(100),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(total_fine_tuning_words, activation='softmax')
])
fine_tuning_model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01), metrics=['accuracy'])

# Train the fine tuning model
history_fine_tuning = fine_tuning_model.fit(fine_tuning_predictors, fine_tuning_label, epochs=100, batch_size=256, callbacks=[es])

# Combine the fine tuning layer with the original model
model.layers[-1].set_weights(fine_tuning_model.layers[-1].get_weights())
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01), metrics=['accuracy'])
