In [14]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
import numpy as np
import random
import json
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder 

In [15]:
# Khởi tạo Stemmer
stemmer = LancasterStemmer()

# Đọc tệp intents.json
with open('/kaggle/input/intents/intents.json') as json_data:
    intents = json.load(json_data)

In [16]:
words = []
classes = []
documents = []
stop_words = ['?', 'a', 'an', 'the']

# Process the intents data
for intent in intents['intents']:
    for pattern in intent['patterns']:
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        documents.append((w, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Stem words and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in stop_words]
words = sorted(list(set(words)))

# Sort classes
classes = sorted(list(set(classes)))

In [17]:
# Create training set
training = []
output_empty = [0] * len(classes)

for doc in documents:
    bag = []
    pattern_words = doc[0]
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]

    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

random.shuffle(training)
training = np.array(training, dtype=object)

In [18]:
# Split data into X and Y
train_x = np.array([i[0] for i in training])
train_y = np.array([i[1] for i in training])

# Build the model using Keras
model = Sequential()
model.add(Dense(128, input_dim=len(train_x[0]), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(len(train_y[0]), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

In [19]:
# Train the model without show_metric
model.fit(train_x, train_y, epochs=100, batch_size=8)

Epoch 1/100
[1m2375/2375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - accuracy: 6.6492e-04 - loss: 8.7395
Epoch 2/100
[1m2375/2375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.0553 - loss: 7.1584
Epoch 3/100
[1m2375/2375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.3410 - loss: 4.2306
Epoch 4/100
[1m2375/2375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.5280 - loss: 2.2932
Epoch 5/100
[1m2375/2375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6353 - loss: 1.3228
Epoch 6/100
[1m2375/2375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6667 - loss: 0.9622
Epoch 7/100
[1m2375/2375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6665 - loss: 0.8694
Epoch 8/100
[1m2375/2375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6753 - loss: 0.8017
Epoch 9/100


<keras.src.callbacks.history.History at 0x7cdc50dc62c0>

In [22]:
# Save the model
model.save('chatbot_model.h5')

# Save the training data
import pickle
pickle.dump({'words': words, 'classes': classes, 'train_x': train_x, 'train_y': train_y}, open('training_data.pkl', 'wb'))

In [25]:
# Load the training data and model
data = pickle.load(open('training_data.pkl', 'rb'))
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']

# Load the trained model
from tensorflow.keras.models import load_model
model = load_model('chatbot_model.h5')


In [47]:
# Function to clean up sentences
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# Function to convert sentence to bag of words
def bow(sentence, words):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
    return np.array(bag)

# Function to classify the sentence
def classify(sentence):
    bow_input = bow(sentence, words)  # Bag of words vector
    results = model.predict(np.array([bow_input]))[0]  # Pass the input as a 2D array
    results = [[i, r] for i, r in enumerate(results) if r > 0.25]  # Filter results with probability greater than 0.25
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    return return_list


# Function to get the response from the chatbot
def response(sentence):
    results = classify(sentence)
    if results:
        for i in intents['intents']:
            if i['tag'] == results[0][0]:
                return random.choice(i['responses'])



In [62]:
# Test the chatbot
print(response("Can you explain fly?"))
print(response("Can you give me examples of fly?"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
travel by air:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Annette's scared of flying.
