In [1]:
import numpy as np

In [2]:
import numpy as np

class NaiveBayes:
    def __init__(self):
        self.classes = None  # Unique class labels
        self.priors = {}  # Prior probabilities P(class)
        self.means = {}  # Mean of features for each class
        self.variances = {}  # Variance of features for each class

    def fit(self, X, y):

        #Train the Naive Bayes model by calculating prior probabilities,
        #mean, and variance for each feature per class.

        self.classes = np.unique(y)  # Get unique class labels
        n_features = X.shape[1]  # Number of features in dataset

        for c in self.classes:
            X_c = X[y == c]  # Select all samples belonging to class c
            self.priors[c] = len(X_c) / len(X)  # Compute P(class)
            self.means[c] = np.mean(X_c, axis=0)  # Compute mean per feature
            self.variances[c] = np.var(X_c, axis=0)  # Compute variance per feature

    def _gaussian_pdf(self, x, mean, var):

        #Compute the Gaussian probability density function.

        eps = 1e-9  # Small value to avoid division by zero
        coeff = 1.0 / np.sqrt(2.0 * np.pi * (var + eps))
        exponent = np.exp(- (x - mean) ** 2 / (2 * (var + eps)))
        return coeff * exponent

    def predict(self, X):

        #Predict class labels for the input samples.

        predictions = []

        for x in X:
            posteriors = {}

            for c in self.classes:
                prior = np.log(self.priors[c])  # Use log to prevent underflow
                likelihood = np.sum(np.log(self._gaussian_pdf(x, self.means[c], self.variances[c])))
                posteriors[c] = prior + likelihood  # Compute posterior probability

            predictions.append(max(posteriors, key=posteriors.get))  # Choose class with highest posterior

        return np.array(predictions)

    def evaluate(self, y_true, y_pred):

        #Compute accuracy, precision, recall, and F1-score.

        accuracy = np.mean(y_true == y_pred)
        tp = np.sum((y_true == 1) & (y_pred == 1))
        tn = np.sum((y_true == 0) & (y_pred == 0))
        fp = np.sum((y_true == 0) & (y_pred == 1))
        fn = np.sum((y_true == 1) & (y_pred == 0))

        precision = tp / (tp + fp + 1e-9)
        recall = tp / (tp + fn + 1e-9)
        f1_score = 2 * (precision * recall) / (precision + recall + 1e-9)

        return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1_score": f1_score}


In [3]:
# Example usage
if __name__ == "__main__":
    # Generate synthetic dataset
    np.random.seed(42)
    X1 = np.random.randn(50, 2) + np.array([2, 2])
    X2 = np.random.randn(50, 2) + np.array([-2, -2])
    X = np.vstack((X1, X2))
    y = np.hstack((np.zeros(50), np.ones(50)))  # Two classes: 0 and 1

    model = NaiveBayes()
    model.fit(X, y)
    predictions = model.predict(X)

    metrics = model.evaluate(y, predictions)

    print("Predictions:", predictions)
    print("Metrics:", metrics)

Predictions: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.]
Metrics: {'accuracy': 1.0, 'precision': 0.9999999999800001, 'recall': 0.9999999999800001, 'f1_score': 0.9999999994800002}
