In [1]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [2]:
import nltk
import random
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Download necessary data
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\91939\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\91939\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\91939\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [8]:
# Sample knowledge base (can be expanded)
CORPUS = """
Hello, how can I assist you?
What is your name?
I am a chatbot built using Python and NLTK.
Tell me a joke.
Why did the math book look sad? Because it had too many problems!
What is NLP?
Natural Language Processing (NLP) is a field of AI that helps computers understand human language.
How are you?
I'm functioning as expected, thank you!
Bye
Goodbye! Have a nice day.
"""

In [9]:
# Tokenize and prepare the data
lemmer = WordNetLemmatizer()
sent_tokens = nltk.sent_tokenize(CORPUS.lower())

def LemTokens(tokens):
    return [lemmer.lemmatize(token.lower()) for token in tokens if token not in string.punctuation]

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text))

In [10]:
# Greeting responses
greeting_inputs = ("hello", "hi", "hey", "greetings")
greeting_responses = ["Hello!", "Hi there!", "Hey!", "Greetings!"]

def greet(sentence):
    for word in sentence.split():
        if word.lower() in greeting_inputs:
            return random.choice(greeting_responses)

In [11]:
# Generate chatbot response
def chatbot_response(user_input):
    user_input = user_input.lower()
    temp_tokens = sent_tokens + [user_input]
    vectorizer = TfidfVectorizer(tokenizer=LemNormalize, stop_words=stopwords.words('english'))
    tfidf = vectorizer.fit_transform(temp_tokens)
    similarity_scores = cosine_similarity(tfidf[-1], tfidf[:-1])
    idx = similarity_scores.argsort()[0][-1]
    flat = similarity_scores.flatten()
    flat.sort()
    req_tfidf = flat[-2]

    if req_tfidf == 0:
        return "I'm sorry, I don't understand that."
    else:
        return sent_tokens[idx]


In [12]:
# Chat loop
def chat():
    print("AI Bot: Hello! Ask me anything or type 'bye' to exit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'bye':
            print("AI Bot: Goodbye!")
            break
        elif greet(user_input):
            print("AI Bot:", greet(user_input))
        else:
            print("AI Bot:", chatbot_response(user_input))

In [None]:
# Run chatbot
if __name__ == "__main__":
    chat()

AI Bot: Hello! Ask me anything or type 'bye' to exit.
