<a href="https://colab.research.google.com/github/KarlX07/NLP-assignment/blob/main/NLP_assign%5B%5B04%5D%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score
)

**Chatbot Intent Detection**

In [24]:
print("\n======== Task 1: Chatbot Intent Detection ========\n")


intent_data = {
    "text": [
        "hi", "hello", "hey there",
        "what is your name", "how does this work", "tell me something",
        "this is good", "i like this", "your service is bad",
        "you are useless", "thanks", "this is amazing"
    ],
    "intent": [
        "greeting", "greeting", "greeting",
        "query", "query", "query",
        "feedback", "feedback", "feedback",
        "feedback", "feedback", "feedback"
    ]
}

df_intent = pd.DataFrame(intent_data)





**Vectorizer**

In [25]:
vectorizer_intent = TfidfVectorizer()
X_intent = vectorizer_intent.fit_transform(df_intent["text"])
y_intent = df_intent["intent"]

**Train-test split**

In [26]:
X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(
    X_intent, y_intent, test_size=0.3, random_state=42
)

**Model**

In [27]:
intent_model = LogisticRegression()
intent_model.fit(X_train_i, y_train_i)

**Evaluate**

In [28]:

intent_preds = intent_model.predict(X_test_i)
print("Intent Detection Accuracy:", accuracy_score(y_test_i, intent_preds))

Intent Detection Accuracy: 0.0


# **Chatbot response function**

In [29]:
def chatbot_response(user_input):
    vec = vectorizer_intent.transform([user_input])
    probs = intent_model.predict_proba(vec)
    max_prob = np.max(probs)
    pred_intent = intent_model.predict(vec)[0]


    if max_prob < 0.45:
        return "I'm not sure what you mean. Could you rephrase?"

    if pred_intent == "greeting":
        return "Hello! How can I help you?"
    elif pred_intent == "query":
        return "Let me help you with that."
    elif pred_intent == "feedback":
        return "Thank you for your feedback!"
    else:
        return "I'm not sure what you mean."


print("\nChatbot Demo:")
print("User: hi")
print("Bot:", chatbot_response("hi"))
print("\nUser: i hate this service")
print("Bot:", chatbot_response("i hate this service"))
print("\nUser: asfasfe")
print("Bot:", chatbot_response("asfasfe"))


Chatbot Demo:
User: hi
Bot: I'm not sure what you mean. Could you rephrase?

User: i hate this service
Bot: I'm not sure what you mean. Could you rephrase?

User: asfasfe
Bot: I'm not sure what you mean. Could you rephrase?


# **FAKE NEWS DETECT**

In [30]:
print("\n======== Task 2: Fake News Detection ========\n")

news_data = {
    "text": [
        "Government introduces new tax bill",
        "NASA announces new Mars rover findings",
        "Scientists discover cure for cancer",
        "Celebrity spotted on Mars giving interview",
        "Aliens contacted Indian government secretly",
        "COVID vaccine approved after trials",
        "Minister arrested for alien conspiracy"
    ],
    "label": [
        "real", "real", "real",
        "fake", "fake", "real", "fake"
    ]
}

df_news = pd.DataFrame(news_data)






**Vectorization**

In [31]:
vectorizer_news = TfidfVectorizer(stop_words="english")
X_news = vectorizer_news.fit_transform(df_news["text"])
y_news = df_news["label"]

**Train-test split**

In [32]:

X_train_n, X_test_n, y_train_n, y_test_n = train_test_split(
    X_news, y_news, test_size=0.4, random_state=42
)



**MODEL**

In [33]:
news_model = LogisticRegression()
news_model.fit(X_train_n, y_train_n)

**Predictions**

In [34]:
news_preds = news_model.predict(X_test_n)

**Metrics**

In [35]:
prec = precision_score(y_test_n, news_preds, pos_label="fake")
rec = recall_score(y_test_n, news_preds, pos_label="fake")
f1 = f1_score(y_test_n, news_preds, pos_label="fake")

print("Precision (fake):", prec)
print("Recall (fake):", rec)
print("F1 Score (fake):", f1)

Precision (fake): 0.0
Recall (fake): 0.0
F1 Score (fake): 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# **Ethical & Social Implications (Printed Output)**

In [36]:
print("\n======== Task 3: Ethical & Social Implications ========\n")

print("Chatbots - Ethical Concern:")
print("They may confidently give incorrect answers, misleading users.")
print("Mitigation: Use confidence thresholds and fallback responses.\n")

print("Fake News Detection - Ethical Concern:")
print("Models can become biased and misclassify real news from minority groups.")
print("Mitigation: Use balanced datasets, bias testing, and human review.\n")



Chatbots - Ethical Concern:
They may confidently give incorrect answers, misleading users.
Mitigation: Use confidence thresholds and fallback responses.

Fake News Detection - Ethical Concern:
Models can become biased and misclassify real news from minority groups.
Mitigation: Use balanced datasets, bias testing, and human review.



# **Code Quality**

In [37]:

print("\n======== ALL TASKS COMPLETED SUCCESSFULLY ========\n")



