In [14]:
!pip install nltk tensorflow numpy



In [16]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [8]:
import numpy as np
import nltk
from nltk.stem import PorterStemmer
import json
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import pickle

In [9]:
# Initialize stemmer
stemmer = PorterStemmer()

In [10]:
# Preprocessing functions
def tokenize(sentence):
    return nltk.word_tokenize(sentence)

def stem(word):
    return stemmer.stem(word.lower())

def bag_of_words(tokenized_sentence, words):
    stemmed_sentence = [stem(w) for w in tokenized_sentence]
    bag = np.zeros(len(words), dtype=np.float32)
    for idx, w in enumerate(words):
        if w in stemmed_sentence:
            bag[idx] = 1.0
    return bag

In [17]:
# Load data
with open('intents.json') as file:
    intents = json.load(file)

words = []
labels = []
docs = []

for intent in intents['intents']:
    tag = intent['tag']
    labels.append(tag)
    for pattern in intent['patterns']:
        tokens = tokenize(pattern)
        words.extend(tokens)
        docs.append((tokens, tag))

words = [stem(w) for w in words if w not in ['?', '!', '.', ',']]
words = sorted(set(words))
labels = sorted(set(labels))

In [18]:
# Create training data
training = []
output = []
out_empty = [0] * len(labels)

for doc in docs:
    bag = []
    pattern_words = doc[0]
    pattern_words = [stem(w) for w in pattern_words]
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    output_row = out_empty.copy()
    output_row[labels.index(doc[1])] = 1
    training.append(bag)
    output.append(output_row)

X_train = np.array(training)
y_train = np.array(output)

In [27]:
# Build model
model = Sequential()
model.add(Dense(8, input_shape=(len(X_train[0]),), activation='relu'))  # Fixed line
model.add(Dense(8, activation='relu'))
model.add(Dense(len(labels), activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=1000, batch_size=8, verbose=1)

Epoch 1/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step - accuracy: 0.3500 - loss: 1.0776
Epoch 2/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.4333 - loss: 1.0491
Epoch 3/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.5417 - loss: 1.0436
Epoch 4/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.5000 - loss: 1.0459
Epoch 5/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.5000 - loss: 1.0510
Epoch 6/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5000 - loss: 1.0394 
Epoch 7/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.4583 - loss: 1.0558
Epoch 8/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5000 - loss: 1.0354 
Epoch 9/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7e0628e91990>

In [28]:
# Save model and data
model.save('chat_model.h5')
with open('data.pkl', 'wb') as f:
    pickle.dump({'words': words, 'labels': labels}, f)

print("Training complete! Model saved.")



Training complete! Model saved.


In [30]:
# Load model and data
model = load_model('chat_model.h5')
with open('data.pkl', 'rb') as f:
    data = pickle.load(f)
words = data['words']
labels = data['labels']



In [31]:
# Load intents
with open('intents.json') as file:
    intents = json.load(file)

stemmer = PorterStemmer()

def preprocess_input(sentence):
    tokens = nltk.word_tokenize(sentence)
    stemmed_tokens = [stemmer.stem(w.lower()) for w in tokens]
    bag = np.zeros(len(words), dtype=np.float32)
    for idx, w in enumerate(words):
        if w in stemmed_tokens:
            bag[idx] = 1.0
    return bag

def get_response(user_input):
    bag = preprocess_input(user_input)
    results = model.predict(np.array([bag]))[0]
    threshold = 0.7
    results = [[i, r] for i, r in enumerate(results) if r > threshold]
    results.sort(key=lambda x: x[1], reverse=True)

    if results:
        intent_tag = labels[results[0][0]]
        for intent in intents['intents']:
            if intent['tag'] == intent_tag:
                return random.choice(intent['responses'])
    return "I don't understand. Try again."

In [None]:
# Run chatbot
print("Chatbot is ready! Type 'quit' to exit.")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = get_response(user_input)
    print("Bot:", response)

Chatbot is ready! Type 'quit' to exit.
