In [None]:

import numpy as np
class GaussianNaiveBayes:
    """
    Gaussian Naive Bayes classifier.

    
    This classifier assumes all features follow a Gaussian distribution and
    estimates per-class means and variances. Variance smoothing is applied
    using the formula: epsilon = var_smoothing * max(feature_variances)

    All probability calculations are done in log space to avoid numerical
    underflow when probabilities become very small. During prediction the model
    converts log probabilities back into normal probabilities and normalizes
    them so that each row sums to one.

    Feature likelihoods
    ----------
    For Gaussian numeric features:
        log_pdf = -0.5 * ( log(2 * pi * variance) + ((x - mean)^2) / variance )

    The model uses maximum likelihood estimates of the mean and variance
    for each feature within each class, with an additional variance smoothing
    term added for numerical stability.

    Parameters
    ----------
    var_smoothing : float, optional (default=1e-9)
        Portion of the largest feature variance added to each variance estimate
        for numerical stability.

    Example
    -------
    >>> X = np.array([[1.0, 2.0],
    ...               [1.2, 1.9],
    ...               [3.2, 4.8],
    ...               [3.0, 5.1]])
    >>> y = np.array(['A', 'A', 'B', 'B'])
    >>> model = GaussianNaiveBayes()
    >>> model.fit(X, y)
    >>> model.predict([[1.1, 2.0]])
    array(['A'], dtype='<U1')
    """


    def __init__(self, var_smoothing=1e-9):
        self.var_smoothing = var_smoothing
        self.classes_ = None
        self.log_priors_ = {}
        self.gaussian_means_ = {}
        self.gaussian_vars_ = {}

    # Likelihood helper functions
    def _gaussian_log_pdf(self, x, mean, var):
        """
        Compute the log-density of a Gaussian distribution for a single feature.

        Parameters
        ----------
        x : float
            Observed value of the feature.
        mean : float
            Mean of the feature for the given class.
        var : float
            Variance of the feature for the given class (after smoothing).

        Returns
        -------
        float
            The log probability density log(P(x | mean, var)).

        Example
        -------
        >>> model = GaussianNaiveBayes()
        >>> model._gaussian_log_pdf(2.0, mean=2.0, var=1.0)
        0.0
        """
        
        return -0.5 * (np.log(2.0 * np.pi * var) + ((x - mean) ** 2) / var)

    # Aggregator
    def _compute_log_likelihood(self, x_row, class_label):
        """
        Compute the total log-likelihood log p(x | c) of a single sample under
        a given class using the Gaussian Naive Bayes assumption.

        Under the Naive Bayes conditional independence (i.i.d.) assumption,
        the joint likelihood factorizes across features:
            p(x | c) = ∏_j p(x_j | c)
            log p(x | c) = ∑_j log p(x_j | c)

        Each feature likelihood p(x_j | c) is modeled as a univariate Gaussian
        distribution with class-specific mean and variance.

        Parameters
        ----------
        x_row : array-like of shape (n_features,)
            A single data point.
        class_label : object
            The class for which likelihood is being computed.

        Returns
        -------
        float
            The total log-likelihood log p(x | c), computed as the sum of 
            Gaussian log-densities across all features.
        
        Notes
        -----
            - This method operates fully in log space to ensure numerical stability.
            - The method assumes that Gaussian means and variances for each class
            have already been estimated and stored in:
                - self.gaussian_means_[class_label]
                - self.gaussian_vars_[class_label]

            Algorithm
            ---------
            1. Initialize log-likelihood accumulator log_l = 0
            2. For each feature j:
                a. Retrieve class-specific mean μ_{c,j} and variance σ²_{c,j}
                b. Compute log p(x_j | c) using a Gaussian log-pdf
                c. Add the result to log_l
            3. Return the accumulated log-likelihood

        Example
        -------
        >>> model = GaussianNaiveBayes()
        >>> model.gaussian_means_ = {'A': {0: 0.0}}
        >>> model.gaussian_vars_ = {'A': {0: 1.0}}
        >>> model._compute_log_likelihood([0.0], 'A')
        0.0

        >>> model.gaussian_means_ = {'A': {0: 0.0, 1: 1.0}}
        >>> model.gaussian_vars_  = {'A': {0: 1.0, 1: 1.0}}
        >>> model._compute_log_likelihood([0.0, 1.0], 'A')
        -1.8378770664093453
        """
        log_l = 0.0

        for j in range(len(x_row)):
            mean = self.gaussian_means_[class_label][j]
            var  = self.gaussian_vars_[class_label][j]
            log_l += self._gaussian_log_pdf(x_row[j], mean, var)

        return log_l

    # Fit
    def fit(self, X, y):
        """
        Fit the Gaussian Naive Bayes classifier.

        Learns:
        - class prior probabilities log(P(Y=c))
        - per-class feature means
        - per-class feature variances (with sklearn-style smoothing)

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data matrix of continuous features.
        y : array-like of shape (n_samples,)
            Class labels for each sample.

        Returns
        -------
        self : GaussianNaiveBayes
            The fitted model instance.

        Example
        -------
        >>> X = np.array([[1, 2], [1.1, 1.8], [3, 4.9]])
        >>> y = np.array(['A', 'A', 'B'])
        >>> model = GaussianNaiveBayes()
        >>> model.fit(X, y)
        GaussianNaiveBayes(...)
        """
        X = np.asarray(X, float)
        y = np.asarray(y)

        if np.isnan(X).any():
            raise ValueError("GaussianNaiveBayes does not support missing values. "
                     "Please remove or impute missing data before fitting.")


        n_samples, n_features = X.shape
        self.classes_ = np.unique(y)

        feature_vars = X.var(axis=0)          # per-feature variance
        max_var = np.max(feature_vars)        # biggest variance across features
        epsilon = self.var_smoothing * max_var 

        # class priors
        for c in self.classes_:
            n_c = np.sum(y == c)
            self.log_priors_[c] = np.log(n_c / n_samples)

        # init Gaussian dicts
        for c in self.classes_:
            self.gaussian_means_[c] = {}
            self.gaussian_vars_[c] = {}

        # compute Gaussian parameters
        for c in self.classes_:
            Xc = X[y == c]

            means = Xc.mean(axis=0)
            vars_  = Xc.var(axis=0)

            # smoothing:
            # var += eps * global_variance
            smoothed_vars = vars_ + epsilon

            for j in range(n_features):
                self.gaussian_means_[c][j] = means[j]
                self.gaussian_vars_[c][j] = smoothed_vars[j]

        return self

    # Prediction
    def predict_log_proba(self, X):
        """
        Compute unnormalized log posterior scores log P(Y = c | X) for each sample
        and each class using Gaussian Naive Bayes.

        Using Bayes' rule, the posterior is proportional to the product of the
        class prior and the class-conditional likelihood:
            P(c | x) ∝ P(c) · P(x | c)

        Taking the logarithm gives:
            log P(c | x) = log P(c) + log P(x | c)

        where log P(x | c) is computed under the Naive Bayes conditional
        independence (i.i.d.) assumption as the sum of per-feature Gaussian
        log-likelihoods.


        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Input samples, where each row corresponds to a single data point.

        Returns
        -------
        ndarray of shape (n_samples, n_classes)
            Log posterior scores (not normalized).
        
        Notes
        -----
        - All computations are performed in log space for numerical stability.
        - This method assumes that the following attributes have already been
        estimated during fitting:
            - self.classes_
            - self.log_priors_
            - self.gaussian_means_
            - self.gaussian_vars_

        Algorithm
        ---------
        1. Initialize a matrix log_probs of shape (n_samples, n_classes)
        2. For each sample i:
            a. Extract the sample x_row = X[i]
            b. For each class c:
                i.   Compute log-likelihood log P(x | c) using _compute_log_likelihood
                ii.  Add the class log prior log P(c)
                iii. Store the result in log_probs[i, c]
        3. Return the log_probs matrix            

        Example
        -------
        >>> model = GaussianNaiveBayes()
        >>> model.classes_ = np.array(['A'])
        >>> model.log_priors_ = {'A': 0.0}
        >>> model.gaussian_means_ = {'A': {0: 0.0}}
        >>> model.gaussian_vars_  = {'A': {0: 1.0}}
        >>> model.predict_log_proba([[0]])
        array([[0.]])

        >>> model.classes_ = np.array(['A', 'B'])
        >>> model.log_priors_ = {'A': np.log(0.5), 'B': np.log(0.5)}
        >>> model.gaussian_means_ = {
        ...     'A': {0: 0.0},
        ...     'B': {0: 1.0}
        ... }
        >>> model.gaussian_vars_ = {
        ...     'A': {0: 1.0},
        ...     'B': {0: 1.0}
        ... }
        >>> model.predict_log_proba([[0.0], [1.0]])
        array([[-0.9189..., -1.4189...],
            [-1.4189..., -0.9189...]])
            
        """
        X = np.asarray(X, float)
        n_samples = X.shape[0]
        log_probs = np.zeros((n_samples, len(self.classes_)))

        for i in range(n_samples):
            x_row = X[i]
            for k, c in enumerate(self.classes_):
                log_l = self._compute_log_likelihood(x_row, c)
                log_probs[i, k] = self.log_priors_[c] + log_l

        return log_probs

    def predict_proba(self, X):
        """
        Compute normalized posterior probabilities P(Y = c | X) for each sample
        using the Gaussian Naive Bayes model.

        This method converts unnormalized log posterior scores obtained from
        `predict_log_proba` into valid probability distributions by applying
        the softmax function. To ensure numerical stability, the log-sum-exp
        trick is used by shifting log probabilities before exponentiation.

        Specifically, for each sample x:
            P(c | x) = exp(log P(c | x)) / sum_k exp(log P(k | x))
        where log P(c | x) = log P(c) + log P(x | c).

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)

        Returns
        -------
        ndarray of shape (n_samples, n_classes)
            Normalized probabilities. Each row sums to 1.

        Notes
        -----
        - This method relies on `predict_log_proba` to compute unnormalized
        log posterior scores.
        - For numerical stability, the maximum log posterior value is subtracted
        from each row before exponentiation:
            shifted = log_probs - max(log_probs)

        This does not change the resulting probabilities because softmax
        is invariant to constant shifts.

        Algorithm
        ---------
        1. Compute unnormalized log posterior scores using predict_log_proba(X)
        2. For each sample, subtract the maximum log score (numerical stability)
        3. Exponentiate the shifted log scores
        4. Normalize by dividing by the row-wise sum
        5. Return normalized probabilities


        Example
        -------
        >>> model = GaussianNaiveBayes()
        >>> model.classes_ = np.array(['A'])
        >>> model.log_priors_ = {'A': 0.0}
        >>> model.gaussian_means_ = {'A': {0: 0}}
        >>> model.gaussian_vars_  = {'A': {0: 1}}
        >>> model.predict_proba([[0]])
        array([[1.]])

        >>> model.classes_ = np.array(['A', 'B'])
        >>> model.log_priors_ = {'A': np.log(0.5), 'B': np.log(0.5)}
        >>> model.gaussian_means_ = {
        ...     'A': {0: 0.0},
        ...     'B': {0: 1.0}
        ... }
        >>> model.gaussian_vars_ = {
        ...     'A': {0: 1.0},
        ...     'B': {0: 1.0}
        ... }
        >>> model.predict_proba([[0.0], [1.0]])
        array([[0.6224..., 0.3775...],
            [0.3775..., 0.6224...]])
        """
        log_probs = self.predict_log_proba(X)
        max_log = np.max(log_probs, axis=1, keepdims=True)
        shifted = log_probs - max_log

        probs = np.exp(shifted)
        probs = probs / probs.sum(axis=1, keepdims=True)

        return probs

    def predict(self, X):
        """
        Predict the most likely class label for each sample.

        This method first computes normalized posterior probabilities using
        `predict_proba`, and then selects the class with the highest probability
        for each sample. Formally, the predicted class is:
            y_hat = argmax_c P(Y = c | x)

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)

        Returns
        -------
        ndarray of shape (n_samples,)
            Predicted class labels corresponding to the maximum posterior
            probability for each sample.

        Algorithm
        ---------
        1. Compute normalized posterior probabilities using predict_proba(X)
        2. Find the index of the maximum probability for each sample
        3. Map indices to class labels using self.classes_
        4. Return predicted class labels

        Example
        -------
        >>> X = np.array([[0], [2]])
        >>> model = GaussianNaiveBayes()
        >>> model.classes_ = np.array(['A'])
        >>> model.log_priors_ = {'A': 0.0}
        >>> model.gaussian_means_ = {'A': {0: 0}}
        >>> model.gaussian_vars_  = {'A': {0: 1}}
        >>> model.predict(X)
        array(['A', 'A'], dtype='<U1')
        """
        probs = self.predict_proba(X)
        class_indices = np.argmax(probs, axis=1)
        return self.classes_[class_indices]