In [2]:
from google.colab import drive 
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import json 
import matplotlib.pyplot as plt 

In [4]:
path = '/content/drive/MyDrive/LSTM/ new_intents.json'

with open(path) as file : 
  data = json.load(file)


training_sentences = [] 
training_labels = [] 
labels = [] 
responses = [] 

for intent in data['intents'] : 
  for pattern in intent['patterns']:
    training_sentences.append(pattern) 
    training_labels.append(intent['tag'])
  responses.append(intent['responses']) 

  labels.append(intent['tag'])

num_classes = len(labels)


In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(training_labels)
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
print(onehot_encoded)

[[0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0.

In [6]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [7]:
vocab_size = 1000 
embedding_dim = 32 
max_len = 20 
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words = vocab_size , oov_token = oov_token , lower = True , filters='!"#$%&()*+,-./:;<=>@[\\]^_`{|}~\t\n' , split = ' ' )

In [8]:
tokenizer.fit_on_texts(list(training_sentences)) 

In [9]:
list_tokenized_train = tokenizer.texts_to_sequences(training_sentences)

In [10]:
max_len = 10 
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', padding = 'post', maxlen=max_len)


In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, onehot_encoded, test_size = 0.20, shuffle = True, stratify=onehot_encoded)

In [12]:
print("Train Dimensions : " ,X_train.shape)
print("Test Dimensions : " ,X_test.shape)


Train Dimensions :  (62, 10)
Test Dimensions :  (16, 10)


In [13]:
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation
from keras.models import Model
from keras.layers import Bidirectional, GlobalMaxPool1D




In [14]:
#input layer 
inp = Input(shape = (max_len , ))
max_features = vocab_size
#embedding layer
embed = embedding_dim
x = Embedding(max_features , embed , mask_zero = True)(inp)
#lstm layer 
x = LSTM(64, return_sequences=True,name='lstm_layer')(x)
#max pooling for reducing dimensions 
x = GlobalMaxPool1D()(x)
#dropout layer
x = Dropout(0.2)(x)
#Dense Layer 
x = Dense(32, activation="relu")(x)
x = Dense(16, activation="relu")(x)
x = Dropout(0.2)(x)
#Output Layer
x = Dense(6, activation="softmax")(x)




In [15]:
model = Model(inputs=inp, outputs=x)
model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

In [16]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10)]              0         
                                                                 
 embedding (Embedding)       (None, 10, 32)            32000     
                                                                 
 lstm_layer (LSTM)           (None, 10, 64)            24832     
                                                                 
 global_max_pooling1d (Globa  (None, 64)               0         
 lMaxPooling1D)                                                  
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                             

In [17]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

In [18]:
callback = EarlyStopping(monitor = 'val_loss' , patience = 10 , mode = 'min' )

In [19]:
filepath = '/content/drive/MyDrive/LSTM/bestv3.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode = 'max')

callback_list = [checkpoint ]

In [20]:
epochs = 500
history=model.fit(X_train, y_train, batch_size=8, epochs=epochs,validation_data=(X_test,y_test) , callbacks = callback_list)

Epoch 1/500
Epoch 1: val_accuracy improved from -inf to 0.31250, saving model to /content/drive/MyDrive/LSTM/bestv3.h5
Epoch 2/500
Epoch 2: val_accuracy improved from 0.31250 to 0.43750, saving model to /content/drive/MyDrive/LSTM/bestv3.h5
Epoch 3/500
Epoch 3: val_accuracy did not improve from 0.43750
Epoch 4/500
Epoch 4: val_accuracy did not improve from 0.43750
Epoch 5/500
Epoch 5: val_accuracy did not improve from 0.43750
Epoch 6/500
1/8 [==>...........................] - ETA: 0s - loss: 1.7323 - accuracy: 0.3750
Epoch 6: val_accuracy did not improve from 0.43750
Epoch 7/500
Epoch 7: val_accuracy did not improve from 0.43750
Epoch 8/500
Epoch 8: val_accuracy did not improve from 0.43750
Epoch 9/500
Epoch 9: val_accuracy did not improve from 0.43750
Epoch 10/500
1/8 [==>...........................] - ETA: 0s - loss: 1.6064 - accuracy: 0.5000
Epoch 10: val_accuracy did not improve from 0.43750
Epoch 11/500
Epoch 11: val_accuracy did not improve from 0.43750
Epoch 12/500
Epoch 12: val

In [21]:
import pickle

# to save the fitted tokenizer
with open('/content/drive/MyDrive/LSTM/tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# to save the fitted label encoder
with open('/content/drive/MyDrive/LSTM/label_encoder.pickle', 'wb') as ecn_file:
    pickle.dump(onehot_encoded, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)

In [22]:
from tensorflow import keras
import random
import numpy as np

In [23]:
def chat():
    # load trained model
    chat_model = keras.models.load_model('/content/drive/MyDrive/LSTM/bestv3.h5')

    # load tokenizer object
    with open('/content/drive/MyDrive/LSTM/tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)

    # load label encoder object
    with open('/content/drive/MyDrive/LSTM/label_encoder.pickle', 'rb') as enc:
        onehot_encoded = pickle.load(enc)

    # parameters
    max_len = 10
    
    while True:
        
        inp = input()
        if inp.lower() == "quit":
            break

        result = chat_model.predict(keras.preprocessing.sequence.pad_sequences(tokenizer.texts_to_sequences([inp]),
                                             truncating='post', maxlen=max_len))
        tag = label_encoder.inverse_transform([np.argmax(result)])
        

        for i in data['intents']:
            if i['tag'] == tag:
                print("ChatBot:", np.random.choice(i['responses']))
chat()

Hi
ChatBot: Hi there
What is TRF?
ChatBot: The Robotics Forum is VIT Pune's premier robotics club, with a 15-year legacy. Students from various engineering disciplines make up our varied community, all of whom are driven by a passion for robotics. The goal of the team is to gain new skills, explore new technologies, and promote the area of robotics.
How can I join TRF?
ChatBot: 
Every year TRF conducts its recruitment process around the end of academic year. This process is for Mechanical , Electrical , Programming as well as the Admin team. The details about the entire process is informed well before the registration and ample preparation time is provided to the students,. 
Thank You!
ChatBot: Have a nice day
quit
