In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# dataset
texts = [
    "buy cheap meds now",
    "cheap pills online",
    "get discount now",
    "meeting at office",
    "project discussion",
    "schedule a meeting"
]

labels = ["Spam", "Spam", "Spam", "Not Spam", "Not Spam", "Not Spam"]

# Create a model pipeline
model = make_pipeline(CountVectorizer(), MultinomialNB(alpha=1.0))  # Laplace smoothing with alpha=1

# Train the model
model.fit(texts, labels)

# Test messages
test_messages = [
    "cheap meds now",
    "schedule a meeting",
    "project schedule",
    "discount online pills",
    "meeting now"
]

# Make predictions
predicted = model.predict(test_messages)
probs = model.predict_proba(test_messages)

# Display results
for msg, label, prob in zip(test_messages, predicted, probs):
    print(f"Message: '{msg}'")
    print(f"Predicted Class: {label}")
    print(f"Probabilities: Spam={prob[model.classes_.tolist().index('Spam')]:.2f}, Not Spam={prob[model.classes_.tolist().index('Not Spam')]:.2f}\n")


Message: 'cheap meds now'
Predicted Class: Spam
Probabilities: Spam=0.92, Not Spam=0.08

Message: 'schedule a meeting'
Predicted Class: Not Spam
Probabilities: Spam=0.11, Not Spam=0.89

Message: 'project schedule'
Predicted Class: Not Spam
Probabilities: Spam=0.16, Not Spam=0.84

Message: 'discount online pills'
Predicted Class: Spam
Probabilities: Spam=0.84, Not Spam=0.16

Message: 'meeting now'
Predicted Class: Not Spam
Probabilities: Spam=0.43, Not Spam=0.57

