In [None]:
# Download the dataset
!wget https://cdn.freecodecamp.org/project-data/sms/train-data.tsv
!wget https://cdn.freecodecamp.org/project-data/sms/valid-data.tsv

# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

In [None]:
# Load the training dataset
df = pd.read_csv("train-data.tsv", sep='\t', names=['label', 'message'])

# Convert labels to numerical values (ham -> 0, spam -> 1)
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Initialize CountVectorizer for text transformation
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['message'])  # Convert messages into numerical vectors
y = df['label']  # Labels


In [None]:
# Train a Naive Bayes classifier
model = MultinomialNB()
model.fit(X, y)  # Train the model with transformed messages and labels

In [None]:
# Function to predict if a message is spam or ham
def predict_message(pred_text):
    transformed_text = vectorizer.transform([pred_text])  # Convert input text
    probability = model.predict_proba(transformed_text)[0][1]  # Get spam probability
    label = "spam" if probability > 0.5 else "ham"  # Assign label based on threshold
    return [probability, label]

# Test a sample message
pred_text = "Congratulations! You've won a free prize. Call now!"
print(predict_message(pred_text))  # Example Output: [0.98, 'spam']


In [None]:
# Function to test the model with predefined messages
def test_predictions():
    test_messages = [
        "how are you doing today",
        "sale today! to stop texts call 98912460324",
        "i dont want to go. can we try it a different day? available sat",
        "our new mobile video service is live. just install on your phone to start watching.",
        "you have won £1000 cash! call to claim your prize.",
        "i'll bring it tomorrow. don't forget the milk.",
        "wow, is your arm alright. that happened to me one time too"
    ]

    test_answers = ["ham", "spam", "ham", "spam", "spam", "ham", "ham"]
    passed = True

    for msg, ans in zip(test_messages, test_answers):
        prediction = predict_message(msg)
        if prediction[1] != ans:
            passed = False

    if passed:
        print("You passed the challenge. Great job!")
    else:
        print("You haven't passed yet. Keep trying.")

# Run test cases
test_predictions()
