## Naive Bayes

#### Training a Classifier for Continuous Features: use Gaussian Naive Bayes

In [696]:
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler

# load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# standardize data
features_standardized = StandardScaler().fit_transform(features)

# create gaussian naie bayes
classifier = GaussianNB(priors = [0.25, .25, .5]) 
classifier.fit(features_standardized, target)

# predict new values
new_obs = [[4,4,4,.4]]
classifier.predict(new_obs)

# priors: set probabilities of each class

array([[0.00000000e+00, 8.66468865e-30, 1.00000000e+00]])

####  Training a Classifier for Discrete and Count Features

In [700]:
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer

# generetae text
text_data = np.array(['bobob', 'mammamama', 'duddodd'])

# target
target = np.array([0,0,1])

# create bags-of-words
bags = CountVectorizer().fit_transform(text_data).toarray()

# create naive bayes
classifier = MultinomialNB(class_prior = [0.25,.5])
classifier.fit(bags, target)

# predict new observation
new_observation = [[0, 0, 1]]
classifier.predict(new_observation)

# if class_prior is not specified, prior probabilites are learned
# using the data. If we have a uniform distribution, we can set
# fit_prior = False
# we can also smooth the model using alpha = 2


array([1])

#### Training a Naive Bayes Classifier for Binary Features: Bernouilli naive bayes classifier

In [705]:
import numpy as np
from sklearn.naive_bayes import BernoulliNB

# generate features
features = np.random.randint(2, size = (100,3))

# generate target
target = np.random.randint(2, size = (100,1)).ravel()

# create classifier
classifier = BernoulliNB(class_prior = [.25,.5])
classifier.fit(features, target)

# more param: class_prior = None, fit_prior = True

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=[0.25, 0.5], fit_prior=True)

#### Calibrating Predicted Probabilities: use signomoid to make values probabilities

In [708]:
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_iris
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import StandardScaler

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# standardize features
features_standardized = StandardScaler().fit_transform(features)

# create classifier
classifier_sigmoid = CalibratedClassifierCV(
    GaussianNB(), cv=2, method = 'sigmoid'
).fit(features_standardized, target)

# predict 
new_observation = [[ 2.6, 2.6, 2.6, 0.4]]
classifier_sigmoid.predict_proba(new_observation)


array([[0.0195019 , 0.04473133, 0.93576677]])