In [1]:
import random
import numpy as np
import os
import sys
import time
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding
from keras.preprocessing.text import Tokenizer
from keras.utils.data_utils import pad_sequences

In [2]:
tokens = random.randint(1,50)
with open('prompts.txt', 'r') as f:
    text_data = f.readlines()

In [3]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
total_words = len(tokenizer.word_index) + 1

In [4]:
input_sequences = []
next_words = []
for line in text_data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence[:-1])
        next_words.append(n_gram_sequence[-1])

In [5]:
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(
    pad_sequences(
    input_sequences, 
    maxlen=max_sequence_len,
    padding='pre')
)

predictors, label = input_sequences[:, :-1], input_sequences[:, -1]

In [6]:
model_filename = 'PANDA.h5'

In [7]:
if not os.path.exists(model_filename):
    # Build the model
    model = Sequential()
    model.add(Embedding(total_words,
                        60,
                        input_length=max_sequence_len - 1))
    model.add(LSTM(200))
    model.add(Dense(total_words, activation='softmax'))
    model.compile(
        loss='sparse_categorical_crossentropy', 
        optimizer='adam', metrics=['accuracy'])
    model.fit(predictors, 
              label, epochs=300, 
              verbose=1)
    model.save(model_filename)
else:
    # Build the model
    model = Sequential()
    model.add(Embedding(
        total_words, 60, 
        input_length=max_sequence_len - 1)
    )
    model.add(LSTM(200))
    model.add(Dense(total_words, activation='softmax'))
    model.compile(
        loss='sparse_categorical_crossentropy', 
        optimizer='adam', 
        metrics=['accuracy']
    )
    model.load_weights(model_filename)

In [8]:
def completion(seed_text, num_words=tokens):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences(
        [token_list], 
        maxlen=max_sequence_len - 1,
        padding='pre'
    )
    predicted_words = []

    for _ in range(num_words):
        predicted = model.predict(token_list, verbose=0)
        predicted_index = np.argmax(predicted)
        predicted_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted_index:
                predicted_word = word
                break
        predicted_words.append(predicted_word)
        token_list = np.append(
            token_list[:, 1:], 
            [[predicted_index]], 
            axis=1
        )

    response = ' '.join(predicted_words)
    return response

In [9]:
def typed(response):
    for char in response:
        sys.stdout.write(char)
        sys.stdout.flush()
        time.sleep(0.01)
    sys.stdout.write('\n')
    
os.system('cls')

0

In [10]:
print('I\'m PANDA , Paradigm-based Artificial Neural Dialogue Agent , A Language Model which is able to predict next words')

I'm PANDA , Paradigm-based Artificial Neural Dialogue Agent , A Language Model which is able to predict next words


In [None]:
user = input('username : ')
while True:
    user_input = input(f"\n ↳ ({user}) ")
    response = completion(user_input)
    typed(response)

username :  vivek09thakur

 ↳ (vivek09thakur)  hello


is a certainly game it ground you've find a good hobby so the husband new new 1 before him about new



 ↳ (vivek09thakur)  how are 


you doing today i'm doing great what about she only it it was hot a stamp on 20 year in a



 ↳ (vivek09thakur)  hi


there is it it's only 10 minutes in a hot day at the same relax to win by one human i
