In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [2]:
class NaiveBayes():
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)

        self.means = np.zeros((n_classes, n_features), dtype=np.float64)
        self.vars = np.zeros((n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros((n_classes), dtype=np.float64)

        for i, c in enumerate(self.classes):
            X_c = X[y == c]
            self.means[i, :] = X_c.mean(axis=0)
            self.vars[i, :] = X_c.var(axis=0)
            self.priors[i] = X_c.shape[0] / n_samples

    def predict(self, X):
        return np.array([self._predict(x) for x in X])
        
    def _predict(self, x):
        posteriors = []

        for i, c in enumerate(self.classes):
            prior = np.log(self.priors[i])
            class_conditional = np.sum(np.log(self.prob_density_fn(i, x)))
            posterior = class_conditional + prior
            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]
    
    def prob_density_fn(self, class_idx, x):
        mean = self.means[class_idx]
        var = self.vars[class_idx]
        prior = self.priors[class_idx]

        num = np.exp(- (x - mean) ** 2 / (2 * var))
        den = np.sqrt(2 * np.pi * var)

        return num / den

In [3]:
def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

In [4]:
iris = datasets.load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [5]:
X.shape, y.shape

((150, 4), (150,))

In [6]:
X[42], y[42]

(array([4.4, 3.2, 1.3, 0.2]), 0)

In [7]:
np.unique(y)

array([0, 1, 2])

In [8]:
model = NaiveBayes()
model.fit(X_train, y_train)

In [9]:
y_pred = model.predict(X_test)
accuracy(y_test, y_pred)

0.9666666666666667