In [98]:
import math
import pandas as pd
import re

p_spam = 0.9    #mentioned in the question
p_nonSpam = 0.1 #mentioned in the question

In [99]:
#Table 1 values
probability_array = {
    "spam" : {
        "Anti-aging" : 0.00062,
        "Customers" : 0.005,
        "Fun" : 0.00015,
        "Gronngen" : 0.00001,
        "Lecture" : 0.000015,
        "Money" : 0.002,
        "Vacation" : 0.00025,
        "Viagra" : 0.001,
        "Watches" : 0.0003
    },
    "non-spam" : {
        "Anti-aging" : 0.000000035,
        "Customers" : 0.0001,
        "Fun" : 0.0007,
        "Gronngen" : 0.001,
        "Lecture" : 0.0008,
        "Money" : 0.0005,
        "Vacation" : 0.00014,
        "Viagra" : 0.0000003,
        "Watches" : 0.000004
    }
}

In [100]:
#To display the values
df_probability_array = pd.DataFrame(probability_array)
print(df_probability_array)

                spam      non-spam
Anti-aging  0.000620  3.500000e-08
Customers   0.005000  1.000000e-04
Fun         0.000150  7.000000e-04
Gronngen    0.000010  1.000000e-03
Lecture     0.000015  8.000000e-04
Money       0.002000  5.000000e-04
Vacation    0.000250  1.400000e-04
Viagra      0.001000  3.000000e-07
Watches     0.000300  4.000000e-06


In [101]:
#To read the words in the email that have the keywords as mentioned in the table
def find_keywords_in_sentence (sentence, df_probability_array):
    #To make sure all the words in the sentence are in lower case
    sentence = re.sub(r'[^\w\s]', '', sentence.lower())
    keywords = [word.lower() for word in df_probability_array.index]
    words_in_sentence = sentence.split()
    matches = [word for word in words_in_sentence if word in keywords]
    return matches

In [102]:
#Code to define Naive Bayes 
def naive_bayes_calculation(keywords_in_sentence, p_spam, p_nonSpam, df_probability_array):
    #To make sure the words are capitalized to match the probability array
    p_spam_email = math.prod([df_probability_array["spam"].get(word.capitalize(), 1) for word in keywords_in_sentence]) 
    p_nonSpam_email = math.prod([df_probability_array["non-spam"].get(word.capitalize(), 1) for word in keywords_in_sentence])
    #Multiply with the prior probabaility
    p_spam_in_email = p_spam * p_spam_email
    p_nonSpam_in_email = p_nonSpam * p_nonSpam_email

    return p_spam_in_email, p_nonSpam_in_email


In [103]:
#To display the results
def display_results (sentence):
    print (sentence)
    keywords_in_email = find_keywords_in_sentence (sentence, df_probability_array)
    print(f"Keywords from this sentence : {keywords_in_email}")
    spam_prob , nonSpam_prob = naive_bayes_calculation(keywords_in_email, p_spam, p_nonSpam, df_probability_array)
    print(f"Probability of spam is {spam_prob:.12f} and probability of not spam in {nonSpam_prob:.12f}.")
    print(f"From this we can coclude that the above sentence is {'spam.' if spam_prob > nonSpam_prob else 'non-spam.'} \n")

In [104]:
#Part (a)
sentence = "\"We offer our dear customers a wide selection of classy watches.\""
display_results(sentence)

#Part (b)
sentence = "\"Did you have fun on vacation? I sure did!\""
display_results(sentence)



"We offer our dear customers a wide selection of classy watches."
Keywords from this sentence : ['customers', 'watches']
Probability of spam is 0.000001350000 and probability of not spam in 0.000000000040.
From this we can coclude that the above sentence is spam. 

"Did you have fun on vacation? I sure did!"
Keywords from this sentence : ['fun', 'vacation']
Probability of spam is 0.000000033750 and probability of not spam in 0.000000009800.
From this we can coclude that the above sentence is spam. 

