<a href="https://colab.research.google.com/github/vanshika847/Week-1-Chatbot-using-NLP/blob/main/Week1_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install nltk scikit-learn




In [2]:
import nltk
import random
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from nltk.stem import WordNetLemmatizer

# Download necessary NLTK datasets
nltk.download('punkt')
nltk.download('wordnet')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [3]:
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Define chatbot intents
intents = [
    {"tag": "greeting", "patterns": ["Hi", "Hello", "Hey", "How are you", "What's up"],
     "responses": ["Hi there!", "Hello!", "Hey!", "I'm fine, thank you.", "Nothing much!"]},

    {"tag": "goodbye", "patterns": ["Bye", "See you later", "Goodbye", "Take care"],
     "responses": ["Goodbye!", "See you later!", "Take care!"]},

    {"tag": "thanks", "patterns": ["Thank you", "Thanks", "Thanks a lot", "I appreciate it"],
     "responses": ["You're welcome!", "No problem!", "Glad I could help!"]},

    {"tag": "about", "patterns": ["What can you do", "Who are you", "What are you", "What is your purpose"],
     "responses": ["I am a chatbot.", "My purpose is to assist you!", "I can answer questions and provide assistance."]},

    {"tag": "help", "patterns": ["Help", "I need help", "Can you help me", "What should I do"],
     "responses": ["Sure, what do you need help with?", "I'm here to help! What's the problem?", "How can I assist you?"]},

    {"tag": "developer", "patterns": ["Who is your developer", "Who created you"],
     "responses": ["My developer is Suraj Sharma!", "Suraj Sharma created me."]},

    {"tag": "play_music", "patterns": ["Hey, can you play a song for me?", "Please play a song for me"],
     "responses": ["Yes sure! Which song would you like to play?", "Yeah! Which song?"]},

    {"tag": "unknown", "patterns": [], "responses": ["I'm not sure how to respond to that. Can you rephrase?"]}
]


In [4]:
# Prepare training data
all_patterns, all_labels, responses = [], [], {}

for intent in intents:
    for pattern in intent['patterns']:
        all_patterns.append(pattern.lower())  # Convert to lowercase for consistency
        all_labels.append(intent['tag'])
    responses[intent['tag']] = intent['responses']


In [5]:
# Function to preprocess user input
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence.lower())  # Tokenize & lowercase
    return " ".join([lemmatizer.lemmatize(w) for w in sentence_words])  # Join as string


In [6]:
# Vectorization using TF-IDF
vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split())  # Splitting preprocessed text
X = vectorizer.fit_transform(all_patterns)

# Train classifier
classifier = LogisticRegression()
classifier.fit(X, all_labels)




In [10]:
# Function to get chatbot response
def get_response(user_input):
    input_vector = vectorizer.transform([clean_up_sentence(user_input)])  # Process input
    predicted_tag = classifier.predict(input_vector)[0]

    # If prediction confidence is too low, return default response
    if predicted_tag not in responses:
        return random.choice(responses["unknown"])

    return random.choice(responses[predicted_tag])


In [9]:
print("Chatbot: Hello! Type 'quit' to exit.")

while True:
    user_input = input("You: ")
    if user_input.lower() == "quit":
        print("Chatbot: Goodbye!")
        break
    print(f"Chatbot: {get_response(user_input)}")


Chatbot: Hello! Type 'quit' to exit.
You: quit
Chatbot: Goodbye!
