1. Theory + NLP concepts (stemming, tokenization, BOW)

2. Create training data

3. PyTorch model and training

4. Save/load model and implement the cat

X: BOW --> Y: tag (based on freq) --> response

NLP Theory

1. Tokenization

    "Aren't you there?"

    --> ['are', 'n't', 'you', 'there', '?']

2. Stemming

    "organize", "organizes", "organizing"
    
    --> ["organ", "organ", "organ"]


# Hyperparameter

In [38]:
batch_size = 8
learninig_rate = 0.001
hidden_size = 128
num_epochs = 1000

# Tokenize and Stemming

In [1]:
import nltk
from nltk.stem.porter import PorterStemmer
# nltk.download('punkt')
import numpy as np

stemmer = PorterStemmer()

In [2]:
def tokenize(sentence):
    return nltk.wordpunct_tokenize(sentence)

In [3]:
def stem(word):
    return stemmer.stem(word.lower())

In [4]:
a = "See you later, thanks for visiting"
print(a)
tokenized_a = tokenize(a)
print(tokenized_a)
stemmed_a = [stem(w) for w in tokenized_a]
print(stemmed_a)


See you later, thanks for visiting
['See', 'you', 'later', ',', 'thanks', 'for', 'visiting']
['see', 'you', 'later', ',', 'thank', 'for', 'visit']


In [5]:
def bag_of_words(tokenized_sentence, all_words):
    """
    sentence = ["h", "j", "i", "e", "u"]
    words = ["h", "i", "e"]
    BOW = [1, 0, 1, 1, 0]
    """
    tokenized_sentence = [stem(w) for w in tokenized_sentence]

    bags = np.zeros(len(all_words), dtype=np.float32)

    for idx, w in enumerate(all_words):
        if w in tokenized_sentence:
            bags[idx] = 1.0

    return bags

In [6]:
sentence = ['hello', 'ha', 'real', 'you']
words = ['hi', 'hello', 'I', 'think', 'you']
bog = bag_of_words(sentence, words)
print(bog)

[0. 1. 0. 0. 1.]


# Manipulating json, all_words, tags, xy

In [7]:
import json

In [8]:
with open('data/intents.json', 'r') as f:
    intents = json.load(f)

intents

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi',
    'Hey',
    'How are you',
    'Is anyone there?',
    'Hello',
    'Good day'],
   'responses': ['Hey :-)',
    'Hello, thanks for visiting',
    'Hi there, what can I do for you?',
    'Hi there, how can I help?']},
  {'tag': 'goodbye',
   'patterns': ['Bye', 'See you later', 'Goodbye'],
   'responses': ['See you later, thanks for visiting',
    'Have a nice day',
    'Bye! Come back again soon.']},
  {'tag': 'thanks',
   'patterns': ['Thanks', 'Thank you', "That's helpful", "Thank's a lot!"],
   'responses': ['Happy to help!', 'Any time!', 'My pleasure']},
  {'tag': 'items',
   'patterns': ['Which items do you have?',
    'What kinds of items are there?',
    'What do you sell?'],
   'responses': ['We sell coffee and tea', 'We have coffee and tea']},
  {'tag': 'payments',
   'patterns': ['Do you take credit cards?',
    'Do you accept Mastercard?',
    'Can I pay with Paypal?',
    'Are you cash only?'],
   'responses': ['We 

In [9]:
all_words = []
tags = []
xy = []

for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent["patterns"]:
        w = tokenize(pattern)
        all_words.extend(w) # w is an array, we don't want a [[w1],[w2],...], we want a [w1, w2,...], so here using extend 
        xy.append((w, tag))


In [10]:
ignore_words = ['?', '!', '.', ',']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
print(all_words[:5])

["'", 'a', 'accept', 'anyon', 'are']


In [11]:
tags = sorted(set(tags))
tags[:5]

['delivery', 'funny', 'goodbye', 'greeting', 'items']

# dataset

In [13]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [14]:
x_train = []
y_train = []

for (pattern_sentence, tag) in xy:
    bag = bag_of_words(pattern_sentence, all_words)
    
    x_train.append(bag)

    label = tags.index(tag) # an int index for the 'tag'
    y_train.append(label)   # CrossEntropy doesn't expect one-hot label


x_train = np.array(x_train)
y_train = np.array(y_train)


In [18]:
x_train[0].size == len(all_words)

True

In [24]:
class ChatDataset(Dataset):
    def __init__(self, x_train, y_train):
        self.n_samples = len(x_train)
        self.x_data = x_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.n_samples
    
    
    
dataset = ChatDataset(x_train, y_train)
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=4)

In [25]:
dataset[0]

(array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.], dtype=float32),
 3)

# model and pipeline

In [30]:
import torch
import torch.nn as nn

device = torch.device('cuda')

In [34]:
input_size = len(all_words)
num_classes = len(tags)

In [27]:
class net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(net, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)

        return out

In [44]:
model = net(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes).to(device)

In [45]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learninig_rate)

In [47]:
num_iteration = len(train_loader) / batch_size

for epoch in range(num_epochs):
    for i, (x, y) in enumerate(train_loader):
        x = x.to(device)
        y = y.to(device)

        y_hat = model(x)

        loss = criterion(y_hat, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 100 == 0:
        print(f'Epoch {epoch+1} / {num_epochs} loss = {loss.item():.4f}')

print(f'Final loss : {loss.item():.4f}')


Epoch 100 / 1000 loss = 0.0012
Epoch 200 / 1000 loss = 0.0004
Epoch 300 / 1000 loss = 0.0007
Epoch 400 / 1000 loss = 0.0000
Epoch 500 / 1000 loss = 0.0001
Epoch 600 / 1000 loss = 0.0001
Epoch 700 / 1000 loss = 0.0000
Epoch 800 / 1000 loss = 0.0000
Epoch 900 / 1000 loss = 0.0000
Epoch 1000 / 1000 loss = 0.0000
Final loss : 0.0000


In [53]:
torch.save(model.state_dict(), "./model/model.pth")

# Inference

In [54]:
torch.cuda.empty_cache()

In [56]:
model = net(input_size=input_size, num_classes=num_classes, hidden_size=hidden_size)
model.load_state_dict(torch.load("./model/model.pth"))
model.eval()

net(
  (l1): Linear(in_features=55, out_features=128, bias=True)
  (l2): Linear(in_features=128, out_features=128, bias=True)
  (l3): Linear(in_features=128, out_features=7, bias=True)
  (relu): ReLU()
)

In [64]:
import random
import json
import torch

In [59]:
model = model.to(device)

In [None]:
intents

In [69]:
bot_name = "Yooko"
print("Let's chat! type 'quit' to exit")

while True:
    sentence = input("You: ")
    if sentence == 'quit': 
        print(f"{bot_name}: See you next time, bye!")
        break

    sentence = tokenize(sentence)
    sentence = bag_of_words(sentence, all_words)
    sentence = sentence.reshape(1, sentence.shape[0])
    sentence = torch.from_numpy(sentence)
    sentence = sentence.to(device)

    output = model(sentence)
    _, pred = torch.max(output, dim=1)
    tag = tags[pred.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][pred.item()]

    if prob.item() > 0.7:
        for intent in intents['intents']:
            if tag == intent['tag']:
                print(f'{bot_name}: {random.choice(intent["responses"])}')
    else:
        print(f'{bot_name}: I do not understand...')



Let's chat! type 'quit' to exit
Yooko: What did the buffalo say when his son left for college? Bison.
Yooko: What did the buffalo say when his son left for college? Bison.
Yooko: Happy to help!
Yooko: See you next time, bye!
