<a href="https://colab.research.google.com/github/zzeiadahmed/Data-Science-Projects/blob/main/Intent_Classification_Chatbot_with_NLP_and_Machine_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
import pandas as pd
import re
import nltk
import random
import string

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [None]:
intents = {
    "intents": [
        {
            "tag": "greeting",
            "patterns": ["Hi", "Hello", "Hey", "What's up", "Good day", "Howdy", "Yo", "Hiiiiii"],
            "responses": ["Hello!", "Hi there!", "Hey! How can I help?"]
        },
        {
            "tag": "goodbye",
            "patterns": ["Bye", "See you later", "Goodbye", "Catch you later", "Talk to you soon"],
            "responses": ["Goodbye!", "See you soon!", "Take care!"]
        },
        {
            "tag": "thanks",
            "patterns": ["Thanks", "Thank you", "Much appreciated", "Thx", "Thanks a lot", "Cheers"],
            "responses": ["You're welcome!", "Any time!", "Glad to help!"]
        },
        {
            "tag": "name",
            "patterns": ["What is your name?", "Who are you?", "Tell me your name", "Your name?", "What do I call you?"],
            "responses": ["I'm your chatbot assistant.", "I'm a bot created to help you."]
        },
        {
            "tag": "age",
            "patterns": ["How old are you?", "Your age?", "When were you made?", "How long have you existed?", "Are you old?"],
            "responses": ["I was created recently!", "I don't age like humans."]
        }
    ]
}

In [None]:
with open("intents.json", "w") as f:
    json.dump(intents, f, indent=4)

In [None]:
with open("intents.json") as file:
    data = json.load(file)

In [None]:
all_data = []
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        all_data.append((pattern, intent["tag"]))

df = pd.DataFrame(all_data, columns=["pattern", "tag"])
df.head()

Unnamed: 0,pattern,tag
0,Hi,greeting
1,Hello,greeting
2,Hey,greeting
3,What's up,greeting
4,Good day,greeting


In [None]:
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)       # remove punctuation
    text = re.sub(r'\s+', ' ', text).strip()         # remove extra whitespace
    tokens = text.split()
    tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(tokens)

df['cleaned'] = df['pattern'].apply(preprocess)


In [None]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned'])
y = df['tag']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         age       0.00      0.00      0.00         1
     goodbye       0.00      0.00      0.00         3
    greeting       0.00      0.00      0.00         0
        name       1.00      1.00      1.00         1
      thanks       0.00      0.00      0.00         1

    accuracy                           0.17         6
   macro avg       0.20      0.20      0.20         6
weighted avg       0.17      0.17      0.17         6



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
intent_responses = {intent["tag"]: intent["responses"] for intent in data["intents"]}

def get_response(user_input):
    cleaned_input = preprocess(user_input)
    vectorized_input = vectorizer.transform([cleaned_input])
    predicted_tag = model.predict(vectorized_input)[0]
    return random.choice(intent_responses[predicted_tag])

In [None]:
print("🤖 Chatbot is ready! Type 'quit' to exit.")
while True:
    user_input = input("You: ")
    if user_input.lower() == "quit":
        print("Bot: Goodbye!")
        break
    response = get_response(user_input)
    print(f"Bot: {response}")

🤖 Chatbot is ready! Type 'quit' to exit.
You: hello
Bot: Hello!
You: how old are you?
Bot: I was created recently!
You: what is your name?
Bot: I'm a bot created to help you.
You: Thanks
Bot: Glad to help!
You: quit
Bot: Goodbye!


In [None]:
!streamlit --version

/bin/bash: line 1: streamlit: command not found


In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.47.0-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.0-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m76.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m89.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInst