Example 1:

In [1]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset (Breast cancer dataset)
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the Gaussian Naive Bayes model
model = GaussianNB()
model.fit(X_train, y_train)

# Make predictions
y_pred_nb = model.predict(X_test)

# Evaluate the model
accuracy_nb = accuracy_score(y_test, y_pred_nb)
print("Naive Bayes Accuracy:", accuracy_nb)


Naive Bayes Accuracy: 0.9736842105263158


Example 2:

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Example dataset
data = [
    ("I'm gonna be home soon and i don't want to talk about this stuff anymore tonight. I've cried enough today.", "ham"),
    ("Win a $100 Walmart gift card! Enter now", "spam"),
    ("Do you want to grab coffee tomorrow?", "ham"),
    ("URGENT! You have won a 1 week FREE membership in our $100,000 Prize Jackpot! Txt the word: CLAIM to No: 81010 T&C www.dbuk.net LCCLTD POBOX 4403LDNW1A7RW18", "spam"),
    ("Can we meet this Sunday? I miss you", "ham"),
    ("Reminder from O2: To get 2.50 pounds free call credit, go to http://www.o2.co.uk and enter your code", "spam"),
    ("Hey, how are you doing?", "ham"),
    ("Congrats! 1 year special cinema pass for two is yours. call 09061209465 now! C Suprman V, Matrix3, StarWars3, etc all 4 FREE! bx420-ip4-5we. 150pm", "spam"),
    ("I'm on my way to the store, do you need anything?", "ham"),
    ("You have WON a guaranteed £1000 cash or a £2000 prize. To claim yours, call our customer service representative at 0800 123 456", "spam"),
    ("It was lovely to see you last night, we should do it again soon.", "ham"),
    ("IMPORTANT - You could be entitled up to £3,900 in compensation from mis-sold PPI on a credit or loan. Reply INFO to stop", "spam"),
    ("Good morning, did you sleep well?", "ham"),
    ("See movie tonight? Check out the latest blockbuster.", "ham"),
    ("Claim your free trial of our diet pills! Lose 20 pounds in just one month!", "spam"),
    ("Your mobile number has been selected for a $2,500 luxury cruise for two! Call now to confirm your prize.", "spam"),
    ("Dinner tonight? My place?", "ham"),
    ("Update: Your Amazon order has shipped. Check your delivery status here.", "ham"),
    ("You're invited! Exclusive event this weekend at our store. Reply YES to RSVP.", "spam"),
    ("Could you help me with the project tomorrow? I could really use your expertise.", "ham"),
    ("Don't miss out on this exclusive offer! Buy one get one free for a limited time only!", "spam"),
    ("Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)", "spam"),
    ("Nah I don't think he goes to usf, he lives around here though", "ham"),
    ("FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv", "spam"),
    ("Even my brother is not like to speak with me. They treat me like aids patent.", "ham"),
    ("As per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your callertune for all Callers. Press *9 to copy your friends Callertune", "spam"),
]

# Separate the data and labels
texts = [text for text, label in data]
labels = [label for text, label in data]


In [3]:
# Splitting the dataset
text_train, text_test, label_train, label_test = train_test_split(texts, labels, test_size=0.33, random_state=42)

# Vectorizing the text data
vectorizer = CountVectorizer(stop_words='english')
X_train = vectorizer.fit_transform(text_train)
X_test = vectorizer.transform(text_test)

# Training the Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train, label_train)

# Making predictions
predictions = classifier.predict(X_test)

# Evaluating the classifier
print(f"Accuracy: {accuracy_score(label_test, predictions)}")
print("Classification Report:")
print(classification_report(label_test, predictions))


Accuracy: 0.6666666666666666
Classification Report:
              precision    recall  f1-score   support

         ham       1.00      0.40      0.57         5
        spam       0.57      1.00      0.73         4

    accuracy                           0.67         9
   macro avg       0.79      0.70      0.65         9
weighted avg       0.81      0.67      0.64         9



In [4]:
# Example usage: predict a sentence
new_sentence = "Congratulations! You've won a free trip to Hawaii!"
new_sentence_vectorized = vectorizer.transform([new_sentence])
prediction = classifier.predict(new_sentence_vectorized)[0]
print(f"Prediction for '{new_sentence}': {prediction}")

Prediction for 'Congratulations! You've won a free trip to Hawaii!': spam


In [5]:
print(new_sentence_vectorized)

  (0, 43)	1
  (0, 124)	1


In [6]:
vectorizer2 = CountVectorizer()
print(vectorizer2.fit_transform(["hello hello this,  11  ! is a test", "this is another test", "third example another another test zebra"]))

  (0, 3)	2
  (0, 7)	1
  (0, 0)	1
  (0, 4)	1
  (0, 5)	1
  (1, 7)	1
  (1, 4)	1
  (1, 5)	1
  (1, 1)	1
  (2, 5)	1
  (2, 1)	2
  (2, 6)	1
  (2, 2)	1
  (2, 8)	1


In [7]:
# prompt: can you map the words to numers in vectorizer2

print(vectorizer2.vocabulary_)

{'hello': 3, 'this': 7, '11': 0, 'is': 4, 'test': 5, 'another': 1, 'third': 6, 'example': 2, 'zebra': 8}
