In [1]:
import json 
import numpy as np
import pandas as pd
from nltk_utils import tokenize, stem, bag_of_words

import tensorflow as tf

In [2]:
# import nltk
# nltk.download("punkt")

# Train

In [3]:
ignore_words = ["?", "i", "it", "a", "!", "me", "you", "'s", "does", "of", "do", "my", "are", "you"]

In [4]:
# get tags and set of all words from training data
# xy contains sample prompt and its expected output category

with open("intents.json", "r") as f:
    intents = json.load(f)

tags = []
all_words = []
xy = []

for intent in intents["intents"]:
    tag = intent["tag"]
    tags.append(tag)
    
    for pattern in intent["patterns"]:
        
        words = tokenize(pattern)
        l1 = [stem(w) for w in words if w.lower() not in ignore_words]
        
        all_words.extend(l1)
        xy.append((l1, tag))

all_words = sorted(set(all_words))
np.random.shuffle(xy)

In [5]:
print(tags)
print(all_words)
print(xy)

['unknown', 'greeting', 'goodbye', 'thanks', 'items', 'payments', 'delivery', 'funny']
['appreci', 'avail', 'buy', 'bye', 'card', 'cash', 'credit', 'deliv', 'deliveri', 'expect', 'funni', 'goodby', 'hello', 'help', 'hey', 'hi', 'how', 'item', 'joke', 'know', 'later', 'laugh', 'lot', 'make', 'okay', 'on', 'parcel', 'pay', 'payment', 'paypal', 'product', 'realli', 'sell', 'ship', 'someth', 'thank', 'that', 'up', 'what']
[(['funni', 'joke'], 'funny'), (['bye'], 'goodbye'), (['know', 'joke'], 'funny'), (['sell'], 'items'), (['what', 'up'], 'greeting'), (['goodby'], 'goodbye'), (['expect'], 'delivery'), (['hello'], 'greeting'), (['thank', 'lot'], 'thanks'), (['ship'], 'delivery'), (['thank'], 'thanks'), (['parcel'], 'delivery'), (['cash', 'on', 'deliveri'], 'payments'), (['make', 'laugh'], 'funny'), (['that', 'help'], 'thanks'), (['deliveri'], 'delivery'), (['pay'], 'payments'), (['credit', 'card'], 'payments'), (['buy'], 'items'), (['realli', 'appreci'], 'thanks'), (['someth', 'funni'], 'f

In [6]:
df = pd.DataFrame(xy)
df.head()

Unnamed: 0,0,1
0,"[funni, joke]",funny
1,[bye],goodbye
2,"[know, joke]",funny
3,[sell],items
4,"[what, up]",greeting


In [7]:
df[df[1] == "delivery"]

Unnamed: 0,0,1
6,[expect],delivery
9,[ship],delivery
11,[parcel],delivery
15,[deliveri],delivery
25,[deliv],delivery


In [8]:
# Prepare training set

X_train = []
y_train = []

for sentence, tag in xy:
    bag = bag_of_words(sentence, all_words)
    X_train.append(bag)

    label = bag_of_words(tag, tags)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

In [9]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]], dtype=float32)

In [10]:
y_train

array([[0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 

# Model

In [11]:
ann = tf.keras.models.Sequential()

ann.add(tf.keras.layers.Dense(units=16, input_shape = np.shape(all_words), activation='relu'))
ann.add(tf.keras.layers.Dense(units=16, activation='relu'))

ann.add(tf.keras.layers.Dense(units=len(tags) , activation='softmax'))

ann.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy']) 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
ann.fit(X_train, y_train, batch_size=6, epochs = 200)

Epoch 1/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0s/step - accuracy: 0.3034 - loss: 2.0624   
Epoch 2/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2876 - loss: 2.0594 
Epoch 3/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.2352 - loss: 2.0444  
Epoch 4/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3102 - loss: 2.0353 
Epoch 5/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.2824 - loss: 2.0136  
Epoch 6/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.3186 - loss: 2.0011  
Epoch 7/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.3965 - loss: 1.9949  
Epoch 8/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.3965 - loss: 1.9804  
Epoch 9/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x28098d9ea90>

In [33]:
# Tag index of input sentence

# input needs to be tokenized, stemmed and vectorized
input_1 = "hello"

# ann.predict(bag_of_words(input_1, all_words).reshape(1, -1))
np.argmax(ann.predict(bag_of_words(input_1, all_words).reshape(1, -1)))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


1

# Response

In [14]:
# Respond based on given set of training responses

def respond(sentence):

    '''
    input : 
        sentence : input prompt from user

    output : 
        response : text from given set of responses in training data
        tag :      tag of input prompt
    '''

    words = tokenize(sentence)
    sentence = [stem(w) for w in words if w not in ignore_words]
    bag = bag_of_words(sentence, all_words).reshape(1, -1)
    
    probs = ann.predict(bag)

    if max(probs[0]) < 0.4 :
        max_idx = 0
    else :
        max_idx = np.argmax(probs)
    response = np.random.choice(intents['intents'][max_idx]['responses'])
    tag = intents['intents'][max_idx]['tag']

    return response, tag


In [31]:
input_2 = "how can i pay ?"

respond(input_2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


('We accept VISA, Mastercard and Paypal', 'payments')

# Speech

In [16]:
# pip install PyAudio
# pip install SpeechRecognition
# pip install pyttsx3

In [55]:
import speech_recognition as sr 
import pyttsx3

engine = pyttsx3.init()

In [56]:
# list of languages available in your windows

for voice in engine.getProperty('voices'):
    print(voice.id)

HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0
HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\MSTTS_V110_hiIN_KalpanaM
HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\MSTTS_V110_hiIN_HemantM
HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0


In [59]:
# Assigning language and voice

lang = 'english'
voc = engine.getProperty('voices')[0].id

engine.setProperty('voice', voc)

In [20]:
# Speech to text

def get_transcript(lang):

    ''' 
    input : 
        lang : language of voice input
        
    output : transcript, bool
        if no error, returns transcript of given voice input and true.
        else, returns error message and false.
    '''

    recognizer = sr.Recognizer()
    audio = ' '
    transcript = ' '

    print("\nListening ......\n")
    
    with sr.Microphone() as source:
        audio = recognizer.listen(source)

    try:
        transcript = recognizer.recognize_google(audio, language=lang)
        print(transcript)
        return  transcript, True
    
    except sr.UnknownValueError:
        print("Could not understand audio")
        return "Could not understand audio", False
    
    except sr.RequestError as e:
        print("Could not request results")
        return "Could not request results", False
 

In [53]:
# Text to speech

def speak(response):
    
    '''
    input : response 
        text to be converted to speech
    '''

    engine.say(response)
    engine.runAndWait()


# Main

In [None]:
while True :

    text, correct = get_transcript(lang)

    if correct :

        print("You : ", text)
        response, tag = respond(text)

        print("Chatbot : ", response)
        speak(response)

        if tag == 'goodbye' :
            break
    else :

        speak(text)