# LSTM Based Bi-directional RNN for Intent Identification from Text

In [2]:
# Import the required libraries

%tensorflow_version 2.x
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import re
import nltk

nltk.download('stopwords')
nltk.download('punkt')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation, Bidirectional
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras.layers import InputLayer
from keras.optimizers import SGD,Adam

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [33]:
# Mount the google drive folder and load the CSV files in to pandas dataframes

from google.colab import drive

drive.mount('/content/gdrive', force_remount = True)
os.chdir('/content/gdrive/My Drive/IntentClassification')

intents = pd.read_csv("intents.csv")

Mounted at /content/gdrive


In [34]:
# Examine the content of the dataframe

intents.head(5)

Unnamed: 0,UserEntry,Intent
0,how to create order,HOWTO_CREATE_ORDER
1,how to log order,HOWTO_CREATE_ORDER
2,list of steps to create order,HOWTO_CREATE_ORDER
3,list of steps to log order,HOWTO_CREATE_ORDER
4,what are the steps to create order,HOWTO_CREATE_ORDER


In [35]:
# Read and process glove vectors

# Open the glove vectors text file in read mode
f = open("glove.6B.50d.txt",'r')

# Initialize index variable to zero
ind = 0

# Initialize dictionaries to store the representations
word_to_index = {}
index_to_word = {}
word_to_vec_map = {}

# For loop to iterate through the lines read from the file
for line in f:
  # Split the line read, to read the words and vector representations
  splitLines = line.split()
  # Store the word in a variable
  word = splitLines[0]
  # Store the word vectors in a numpy array
  wordEmbedding = np.array([float(value) for value in splitLines[1:]])

  # Write to the dictionaries
  word_to_index[word] = ind
  index_to_word[ind] = word
  word_to_vec_map[word] = wordEmbedding

  # Increment the index
  ind = ind + 1

In [36]:
# Examine the W2V dictionaries returned from the previous step

print("\n")
print("Length of word to index dict is : ", len(word_to_index))
print("Length of index to word dict is : ", len(index_to_word))
print("Length of word to vector dict is: ",len(word_to_vec_map))
print("\n")
print("Index of happy is: ", word_to_index["happy"])
print("Word at index 25 is: ", index_to_word[25])
print("\n")
print("Word vector for happy is:\n ", word_to_vec_map["happy"])



Length of word to index dict is :  400000
Length of index to word dict is :  400000
Length of word to vector dict is:  400000


Index of happy is:  1751
Word at index 25 is:  from


Word vector for happy is:
  [ 0.092086  0.2571   -0.58693  -0.37029   1.0828   -0.55466  -0.78142
  0.58696  -0.58714   0.46318  -0.11267   0.2606   -0.26928  -0.072466
  1.247     0.30571   0.56731   0.30509  -0.050312 -0.64443  -0.54513
  0.86429   0.20914   0.56334   1.1228   -1.0516   -0.78105   0.29656
  0.7261   -0.61392   2.4225    1.0142   -0.17753   0.4147   -0.12966
 -0.47064   0.3807    0.16309  -0.323    -0.77899  -0.42473  -0.30826
 -0.42242   0.055069  0.38267   0.037415 -0.4302   -0.39442   0.10511
  0.87286 ]


In [38]:
# Function to convert an array of sentences to W2V indices after preprocessing

def sentences_to_indices(X, word_to_index, max_len = 40):

  # Identify No. of sentences in the input
  m = len(X)
  # Initialize the indices array to zeros based on the dimensions
  X_indices = np.zeros((m, max_len))

  # For loop to iterate through the elements of the array one by one
  for i in range(m):

    # Convert to lower case
    X[i] = str(X[i]).lower()
    # Remove punctuations
    X[i] = re.sub(r'[^\w\s]', '', X[i]) 
    # Remove new line
    X[i] = re.sub(r'[\n]', ' ', X[i])
    # Remove underscore
    X[i] = re.sub(r'[_]', '', X[i])

    # Split the sentence to words
    sentence_words = X[i].lower().split()

    # Initialize the word counter to zero
    j = 0
    # For loop to iterate through the words in each of the sentences
    for w in sentence_words:
      # Update the indices array with the index of the word at the required position
      # If a word does not exist in the array, treat it as unknown ('unk')
      if (w in word_to_index):
        X_indices[i, j] = word_to_index[w]
      else:
        X_indices[i, j] = word_to_index['unk']
      j = j + 1
  
  # Return the indices array
  return X_indices

In [39]:
# Function to create the pretrained embedding layer for Keras

def pretrained_embedding_layer(word_to_vec_map, word_to_index):

  # Initialize the vocabulary length
  vocab_len = len(word_to_index) + 1
  # Extract max vector dimension
  emb_dim = word_to_vec_map["happy"].shape[0]
  # Initialize a numpy array with zeros for the embedding layer
  emb_matrix = np.zeros((vocab_len, emb_dim))

  # Iterate through every word and get the vector representation
  for word, idx in word_to_index.items():
    emb_matrix[idx, :] = word_to_vec_map[word]

  # Create the Keras embedding layer
  embedding_layer = Embedding(vocab_len, emb_dim)
  # Make the embedding layer non-trainable
  embedding_layer.build((None,))

  # Set the weights based on the vectors read previously
  embedding_layer.set_weights([emb_matrix])

  # Return the embedding layer
  return embedding_layer

In [43]:
# Compose the Bidirectional LSTM based model

# Compose the Input layer
# Set max length based on the value determined previously
sentence_indices = Input(shape = 40, dtype = "int32")

# Fetch the word embeddings as the embedding layer
embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)

# Set the embedding layer as part of the model
embeddings = embedding_layer(sentence_indices)

# Initialize max classes count
max_class_cnt = 4

# Layer 1
X = Bidirectional(LSTM(units = 128, return_sequences = True))(embeddings)
# Dropout
X = Dropout(0.5)(X)
# Layer 2
X = Bidirectional(LSTM(units = 128, return_sequences = False))(X)
# Dropout
X = Dropout(0.5)(X)

# Output layer with softmax activation
X = Dense(units = max_class_cnt)(X)
X = Activation('softmax')(X)

# Compose model
model = Model(inputs = sentence_indices, outputs = X)

# Compile and summarize the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 40)]              0         
_________________________________________________________________
embedding_3 (Embedding)      (None, 40, 50)            20000050  
_________________________________________________________________
bidirectional_6 (Bidirection (None, 40, 256)           183296    
_________________________________________________________________
dropout_6 (Dropout)          (None, 40, 256)           0         
_________________________________________________________________
bidirectional_7 (Bidirection (None, 256)               394240    
_________________________________________________________________
dropout_7 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 1028

In [44]:
# Compose the concatenated user entry pairs for training

userentry = intents["UserEntry"].tolist()
classes = intents["Intent"].tolist()

# Convert the train sentences in to indices
X_train_indices = sentences_to_indices(userentry, word_to_index, 40)

# Initialize the onehot encoder frok sklearn
enc = OneHotEncoder(sparse = False)
enc.fit(np.array(classes).reshape(-1, 1))

# Convert the train labels to one hot vectors
y_train_oh = enc.transform(np.array(classes).reshape(-1, 1))


In [45]:
# Fit the model

history = model.fit(X_train_indices, y_train_oh, epochs = 20, batch_size = 2, shuffle = True)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [55]:
# Save the model as a h5 file

model.save("rnn_biLSTM_intent_model.h5")

In [53]:
# Enter test sentence

#------------------------------------------------------------------------
X_test = "what are the steps to be followed to create an SR?"
#------------------------------------------------------------------------

# Convert user text to indices
X_test_list=[]
X_test_list.append(X_test)
X_test_indices = sentences_to_indices(X_test_list, word_to_index, 40)

# Make prediction on the user entered text
pred = model.predict(X_test_indices)

# Print the outputs
print("\nEntered Text: ", X_test)
print("\nMax Probability Intent: ", enc.categories_[0][np.argmax(pred[0])])

print("\nProbability Scores...")

print("\n", "Intent".ljust(30, ' '), "\t", "Probability")
for i in range(len(pred[0])):
  print("\n", enc.categories_[0][i].ljust(30, ' '), "\t", pred[0][i])


Entered Text:  what are the steps to be followed to create an SR?

Max Probability Intent:  HOWTO_CREATE_SR

Probability Scores...

 Intent                         	 Probability

 HOWTO_CREATE_ORDER             	 0.016069567

 HOWTO_CREATE_SR                	 0.9801159

 HOWTO_UPDATE_ORDER             	 0.0013079132

 HOWTO_UPDATE_SR                	 0.0025066053
