In [None]:
#example of generating a small classification dataset
from sklearn.datasets import make_blobs
#generate 2d classification dataset
X, y = make_blobs(n_samples = 100, centers = 2, n_features = 2, random_state = 1)
#summarize
print(X.shape, y.shape) # (100, 2) (100,)
print(X[:5])
'''
[[-0.79415228  2.10495117]
 [-9.15155186 -4.81286449]
 [-3.10367371  3.90202401]
 [-1.42946517  5.16850105]
 [-7.4693868  -4.20198333]]
'''
print(y[:5]) #[0 1 0 0 1]


(100, 2) (100,)
[[-0.79415228  2.10495117]
 [-9.15155186 -4.81286449]
 [-3.10367371  3.90202401]
 [-1.42946517  5.16850105]
 [-7.4693868  -4.20198333]]
[0 1 0 0 1]


In [None]:
from scipy.stats import norm
import numpy as np

# fit a probability distribution to a univariate data sample
def fit_distribution(data):
  # estimate parameters
  mu = np.mean(data)
  sigma = np.std(data)
  print(mu, sigma)

  #fit distribution
  dist = norm(mu, sigma)
  return dist


In [None]:
# generate 2d classification dataset
X, y = make_blobs(n_samples = 100, centers = 2, n_features = 2, random_state = 1)

# sort data into classes
Xy0 = X[y == 0]
Xy1 = X[y == 1]
print(Xy0.shape, Xy1.shape) #(50, 2) (50, 2)

# calculate priors
priory0 = len(Xy0)  / len(X)
priory1 = len(Xy1)  / len(X)
print(priory0, priory1) # 0.5 0.5

# create PDFs for y==0
X1y0 = fit_distribution(Xy0[:, 0]) # -1.5632888906409914 0.787444265443213
X2y0 = fit_distribution(Xy0[:, 1]) # 4.426680361487157 0.958296071258367

# create PDFs for y==1
X1y1 = fit_distribution(Xy1[:, 0]) # -9.681177100524485 0.8943078901048118
X2y1 = fit_distribution(Xy1[:, 1]) # -3.9713794295185845 0.9308177595208521


(50, 2) (50, 2)
0.5 0.5
-1.5632888906409914 0.787444265443213
4.426680361487157 0.958296071258367
-9.681177100524485 0.8943078901048118
-3.9713794295185845 0.9308177595208521


In [None]:
# create PDFs for y==0
distX1y0 = fit_distribution(Xy0[:, 0]) # -1.5632888906409914 0.787444265443213
distX2y0 = fit_distribution(Xy0[:, 1]) # 4.426680361487157 0.958296071258367

# create PDFs for y==1
distX1y1 = fit_distribution(Xy1[:, 0]) # -9.681177100524485 0.8943078901048118
distX2y1 = fit_distribution(Xy1[:, 1]) # -3.9713794295185845 0.9308177595208521

# calculate the independent conditional probability
def probability(X, prior, dist1, dist2):
  return prior * dist1.pdf(X[0]) * dist2.pdf(X[1])

# classify one example
Xsample, ysample = X[0], y[0]
py0 = probability(Xsample, priory0, distX1y0, distX2y0)
py1 = probability(Xsample, priory1, distX1y1, distX2y1)
print('P(y=0 | %s) = %.3f' % (Xsample, py0*100)) #
print('P(y=1 | %s) = %.3f' % (Xsample, py1*100))
print('Truth: y=%d' % ysample)

-1.5632888906409914 0.787444265443213
4.426680361487157 0.958296071258367
-9.681177100524485 0.8943078901048118
-3.9713794295185845 0.9308177595208521
P(y=0 | [-0.79415228  2.10495117]) = 0.348
P(y=1 | [-0.79415228  2.10495117]) = 0.000
Truth: y=0


In [None]:
# example of gaussian naive bayes
from sklearn.datasets import make_blobs
from sklearn.naive_bayes import GaussianNB 
#from sklearn.naive_bayes import BernoulliNB 
#from sklearn.naive_bayes import MultinomialNB 

# generate 2d classification dataset
X, y = make_blobs(n_samples =100, centers = 2, n_features = 2, random_state = 1)

# define the model
model = GaussianNB()

# fit the model
model.fit(X, y)

# select a single sample
Xsample, ysample = [X[0]], y[0]

# make a probabilistic prediction
yhat_prob = model.predict_proba(Xsample)
print('Predicted Probabilities: ', yhat_prob)

# make a classification prediction
yhat_class = model.predict(Xsample)
print('Predicted Class: ', yhat_class)
print('Truth: y=%d' % ysample)


Predicted Probabilities:  [[1.00000000e+00 5.52387327e-30]]
Predicted Class:  [0]
Truth: y=0
