In [27]:
pip install tensorflow tensorflow-datasets numpy pandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [28]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import pandas as pd


In [29]:
data = {
    'questions': [
        'Hello',
        'How are you?',
        'What is your name?',
        'What do you do?',
        'Goodbye'
    ],
    'answers': [
        'Hi there!',
        'I am doing well, thank you!',
        'I am a chatbot created to assist you.',
        'I help answer your questions.',
        'Goodbye! Have a great day!'
    ]
}

df = pd.DataFrame(data)


In [30]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['questions'])
sequences = tokenizer.texts_to_sequences(df['questions'])

max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(df['answers'])


In [32]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=16),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(len(df['answers'].unique()), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [33]:
model.fit(padded_sequences, labels, epochs=100)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 748ms/step - accuracy: 0.0000e+00 - loss: 1.6103
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.2000 - loss: 1.6065
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.4000 - loss: 1.6027
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.4000 - loss: 1.5991
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.4000 - loss: 1.5954
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.4000 - loss: 1.5917
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.4000 - loss: 1.5881
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.6000 - loss: 1.5846
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x17b13beaa50>

In [34]:
def chatbot_response(user_input):
    # Preprocess the user input
    seq = tokenizer.texts_to_sequences([user_input])
    padded = pad_sequences(seq, maxlen=max_length, padding='post')
    prediction = model.predict(padded)
    index = np.argmax(prediction)
    return label_encoder.inverse_transform([index])[0]


In [36]:
user_input = input("You: ")
while user_input.lower() != 'exit':
    print("Chatbot:", chatbot_response(user_input))
    user_input = input("You: ")


You:  hello!


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Chatbot: Hi there!


You:  goodbye!


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Chatbot: Goodbye! Have a great day!


You:  what is your name?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Chatbot: I am a chatbot created to assist you.


You:  what can you do


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Chatbot: I am a chatbot created to assist you.


KeyboardInterrupt: Interrupted by user