In [7]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Sample data
data = {
    "message": [
        "Hi John, just checking in to see if we’re still on for lunch tomorrow. Let me know what time works for you!",
        "Limited time offer: Get 50% off your next purchase. Act now before it's too late!",
        "Dear friend, we noticed your account has been inactive. Please verify your details by logging in here.",
        "Hey, are you free to chat later today? Let me know when you're available!",
        "Get your free trial today! Don't miss out on the exclusive deal for premium services!",
        "Reminder: Your dentist appointment is at 3:00 PM tomorrow. Let us know if you need to reschedule.",
        "You’ve been selected for a special offer. Click here to claim your reward!",
        "Good morning! Please find the attached document I mentioned last week.",
        "Win an iPhone 15 today! Just pay shipping and handling to receive your prize.",
        "Hey Sarah, here’s the updated report I was talking about. Let me know if you have any questions.",
        "Free cruise tickets for you and a guest! Just fill out a quick survey and you're all set.",
        "Looking forward to our meeting this Friday. Please let me know if you need any further information.",
        "Exclusive offer! Get a 30% discount on all luxury watches. Shop now before they’re gone!",
        "It was great seeing you last weekend. Hope you’re doing well!"
    ],
    "label": [
        "personal", "promotion", "spam", "personal", "promotion", "reminder", "spam", "work", 
        "spam", "work", "spam", "work", "promotion", "personal"
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

# Transform text data to feature vectors
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Train the model
model = MultinomialNB()
model.fit(X_train_vectorized, y_train)

# Make predictions
y_pred = model.predict(X_test_vectorized)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Classification report
report = classification_report(y_test, y_pred)
print(f'Classification Report: \n{report}')

# Predict function for new emails
def predict_spam(email):
    email_vectorized = vectorizer.transform([email])
    prediction = model.predict(email_vectorized)
    return prediction[0]

# Test the function with a new email
new_email = "Hi John, just checking in to see if we’re still on for lunch tomorrow. Let me know what time works for you!"
result = predict_spam(new_email)
print(f"New Email: '{new_email}'\nPrediction: {result}")

Accuracy: 0.3333333333333333
Classification Report: 
              precision    recall  f1-score   support

    personal       0.33      1.00      0.50         1
        work       0.00      0.00      0.00         2

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3

New Email: 'Hi John, just checking in to see if we’re still on for lunch tomorrow. Let me know what time works for you!'
Prediction: personal


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
