In [5]:

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import re


data = {
    "email": [
        # Spam emails
        "Win a free iPhone now! Click here fast.",
        "Urgent: Your account will be suspended today!",
        "Congratulations! You’ve won $1000—claim it now!",
        "Act fast—limited time offer ends soon!",
        "Claim your exclusive prize before it’s gone!",
        "You’re pre-approved for a $5000 loan—apply instantly!",
        "Last chance to save 90% off—don’t miss out!",
        "Verify your account now or lose everything!",
        "Earn $1000 a day from home—start immediately!",
        "Free gift card waiting—click to redeem now!",
        "Instant cash prize—act before it expires!",
        "Exclusive deal: Win a vacation—enter now!",
        "Your subscription expires soon—renew to avoid penalties!",
        "Click here for your free trial—limited spots!",
        "Urgent alert: Secure your account with this link!",
        "Win big today—click this link now!",
        "Free money waiting—just verify your email!",
        "Act now or miss this exclusive offer!",
        # Not spam emails (normal, no sneaky words)
        "Hi John, can we meet tomorrow at 2?",
        "Please review the attached document for me.",
        "Let’s catch up over coffee this weekend, okay?",
        "Hey, did you see the new movie last night?",
        "Reminder: Team meeting at 3 PM today.",
        "Can you send me the notes from class please?",
        "Happy birthday! Hope you have a great day ahead.",
        "Let me know if you’re free this evening to chat.",
        "Thanks for your help yesterday, I owe you one!",
        "See you at the park later this afternoon?",
        "Hey Sarah, how’s your week going so far?",
        "Can we reschedule our call to tomorrow?",
        "Just checking in—hope you’re doing well!",
        "Let’s plan a movie night soon—your pick!",
        "Hi, want to grab lunch after class today?",
        "Hey, let’s study together this weekend.",
        "Good morning! How’s your day starting?",
    ],
    "label": ["spam"] * 18 + ["not spam"] * 17
}


df = pd.DataFrame(data)


def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

df['cleaned_email'] = df['email'].apply(preprocess_text)


vectorizer = TfidfVectorizer(max_features=50)
X = vectorizer.fit_transform(df['cleaned_email'])
y = df['label']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)


spam_checker = SVC(kernel='linear', C=1.0)
spam_checker.fit(X_train, y_train)


y_pred = spam_checker.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Hey! I trained on tons of emails and got {accuracy*100:.1f}% right. Let’s check yours now!\n")

#sneaky words
sneaky_words = ["urgent", "act now", "win", "suspended", "congratulations", "limited time",
                "exclusive", "claim", "free", "last chance", "verify", "redeem", "cash",
                "pre-approved", "instant", "miss out", "expires", "alert", "secure","Enter your pin"]

def is_sneaky(email):
    email = email.lower()
    for word in sneaky_words:
        if word in email:
            return True
    return False


print("Hi there! I’m your email buddy. Type an email, and I’ll tell you if it’s spam or sneaky.")
while True:

    user_email = input("What email do you want me to check? (Say 'quit' if you’re done): ")


    if user_email.lower() == "quit":
        print("Aww, okay! Catch you later!")
        break


    cleaned_email = preprocess_text(user_email)
    is_spam = spam_checker.predict(vectorizer.transform([cleaned_email]))[0]
    is_sneaky_email = is_sneaky(user_email)

    print(f"\nHere’s what I think about: '{user_email}'")
    if is_spam == "spam":
        print("Uh oh! This looks like spam to me.")
    else:
        print("Phew, this seems like a normal email!")

    if is_sneaky_email:
        print("Hmm, it’s got some sneaky words—like it’s trying to trick you!")
    else:
        print("No sneaky tricks here, looks safe.")
    print("--- Ready for another one! ---\n")


Hey! I trained on tons of emails and got 100.0% right. Let’s check yours now!

Hi there! I’m your email buddy. Type an email, and I’ll tell you if it’s spam or sneaky.
What email do you want me to check? (Say 'quit' if you’re done): Hello,how are you?

Here’s what I think about: 'Hello,how are you?'
Phew, this seems like a normal email!
No sneaky tricks here, looks safe.
--- Ready for another one! ---

What email do you want me to check? (Say 'quit' if you’re done): Urgent! , act now to claim the prize.

Here’s what I think about: 'Urgent! , act now to claim the prize.'
Uh oh! This looks like spam to me.
Hmm, it’s got some sneaky words—like it’s trying to trick you!
--- Ready for another one! ---

What email do you want me to check? (Say 'quit' if you’re done): quit
Aww, okay! Catch you later!
