<a href="https://colab.research.google.com/github/vivek09thakur/PANDA/blob/main/Colab%20Notebook/Panda_Code_Refactored.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Refactored Code of PANDA**

- [x] **DRIVE MOUNTED**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


- [x] **A Refactored version Paradigm based Artificial Neural Dailogue Agent (P.A.N.D.A)**

In [2]:
import os
import sys
import time
import numpy as np
from keras.models import Sequential,load_model
from keras.layers import Dense,LSTM,Embedding
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences


class PANDA:

    def __init__(self,prompts,model_name,tokens=25):
        self.prompts = prompts
        self.tokens = tokens
        with open(self.prompts,'r') as f:
            # Read the lines from the prompts file
            self.text_data = f.readlines()
        # Create a tokenizer
        self.tokenizer = Tokenizer()
        self.tokenizer.fit_on_texts(self.text_data)
        self.total_words = len(self.tokenizer.word_index) + 1
        self.model_name = model_name
        # self.max_sequence_len = 0


    def preprocess_data(self):
        self.input_sequences = []
        self.next_words = []
        for line in self.text_data:
            # Convert the text to sequences
            token_list = self.tokenizer.texts_to_sequences([line])[0]
            for i in range(1,len(token_list)):
                # Create n-grams
                n_grams = token_list[:i+1]
                self.input_sequences.append(n_grams)
                self.next_words.append(token_list[i])

    def generate_pad_sequences(self):
        # Pad sequences
        self.max_sequence_len = max([len(x) for x in self.input_sequences])
        self.input_sequences = np.array(
            pad_sequences(self.input_sequences,
                          maxlen=self.max_sequence_len,padding='pre'))
        self.predictors, self.label = self.input_sequences[:, :-1], self.input_sequences[:, -1]

    def create_model(self,number_of_neurons):
        # Create model
        self.model = Sequential()
        self.model.add(Embedding(self.total_words,100,
                                 input_length=self.max_sequence_len-1))
        self.model.add(LSTM(number_of_neurons))
        self.model.add(Dense(self.total_words,activation='softmax'))
        self.model.compile(loss='sparse_categorical_crossentropy',
                           optimizer='adam',metrics=['accuracy'])
        self.model.fit(self.predictors,self.label,epochs=500,
                       verbose=1)
        self.model.save(self.model_name)

    def load_model(self):
        self.model = load_model(self.model_name)

    def train_or_load_model(self,neuron_num):
        if os.path.exists(self.model_name):
            self.load_model()
        else:
            self.create_model(number_of_neurons=neuron_num)

    def completion(self,user_input):
        # Predictions
        token_list = self.tokenizer.texts_to_sequences([user_input])[0]
        token_list = pad_sequences([token_list],
                                      maxlen=self.max_sequence_len-1,
                                      padding='pre')
        predicted_words = []

        for _ in range(self.tokens):
            predicted = self.model.predict(token_list,verbose=0)
            predicted_index = np.argmax(predicted)
            output_word = ''
            for word,index in self.tokenizer.word_index.items():
                if index == predicted_index:
                    output_word = word
                    break
            predicted_words.append(output_word)
            token_list = np.append(token_list[:,1:],[[predicted_index]],axis=1)
        return ' '.join(predicted_words)

    def type_response(self,response):
        for char in response:
            sys.stdout.write(char)
            sys.stdout.flush()
            time.sleep(0.01)
        print()

    def introduce(self):
        print('Hello, I am PANDA, Paradgim-based Artificial Neural Dialogue Agent. An AI Language Model which is able to predict next sequence of words based on the input sequence of words.')


- [ ] **Test Run**

In [3]:
parameters = [
    '/content/drive/MyDrive/Colab Notebooks/Dataset/PANDA_LLM/prompts.txt', # prompts file
    'Saved Model/panda.h5', # model name
     25 # number of tokens to generate
]

panda = PANDA(parameters[0],parameters[1],parameters[2])
panda.preprocess_data()
panda.generate_pad_sequences()

if __name__=='__main__':

    panda.train_or_load_model(neuron_num=1000)
    panda.introduce()

    while True:
        prompts = input(f"\n ↳ (user) : " )
        completion = panda.completion(prompts)
        panda.type_response(completion)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

  saving_api.save_model(


Hello, I am PANDA, Paradgim-based Artificial Neural Dialogue Agent. An AI Language Model which is able to predict next sequence of words based on the input sequence of words.

 ↳ (user) : hello
will be well it happen i today are you going to move there because well whatever the world new world war ii monument on the

 ↳ (user) : what is your name?
favorite movie i couldn't wait to eat been a lot of good deals as usual money do you believe the cat doesn't care money in

 ↳ (user) : what are you talking about
sunday is mother's day yes really much water is nice but what's wrong with it no the banana was delicious you have a big dictionary

 ↳ (user) : are you hungry
it's ninety degrees outside it's not to be a job yes you chased that i thought you just come back to shoppers i'm still waiting

 ↳ (user) : okay are you talking about the weather
you have to be doing tell to the bathroom you drink a good nose for thatcigarettes stink that was a beautiful i was nearby too

 ↳ (user) : i am n

KeyboardInterrupt: ignored