In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Manually define your dataset here:
# Add as many (label, message) pairs as you want
manual_data = [
    ('ham', 'Hey, are we still on for tonight?'),
    ('spam', 'Win $1000 cash by clicking here!'),
    ('ham', 'I’ll call you later.'),
    ('spam', 'Congratulations! You have won a lottery. Reply YES to claim.'),
    # Add your own data lines below:
    ('ham', 'Can you send me the report?'),
    ('spam', 'Urgent! Your account will be blocked. Verify now!'),
    ('ham', 'Let’s catch up this weekend.'),
    ('spam', 'You’ve been selected for a $500 Walmart gift card.')
]

# Convert manual data to DataFrame
df = pd.DataFrame(manual_data, columns=['label', 'message'])

# Encode labels
df['label_num'] = df['label'].map({'ham': 0, 'spam': 1})

# Vectorize text
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['message'])
y = df['label_num']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# Train SVM
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)

# Evaluate
y_pred = svm.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=0))

# Manual input for predictions
print("\nEnter your message to classify as HAM or SPAM (type 'exit' to quit):")
while True:
    user_msg = input("> ")
    if user_msg.lower() == 'exit':
        print("Exiting.")
        break
    user_vec = vectorizer.transform([user_msg])
    pred = svm.predict(user_vec)[0]
    label = "SPAM" if pred == 1 else "HAM"
    print(f"Prediction: {label}\n")


Accuracy: 0.5

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2


Enter your message to classify as HAM or SPAM (type 'exit' to quit):


>  i'll call you later


Prediction: HAM

