## Problem 2 - Naive Bayes

In [29]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
import warnings
warnings.filterwarnings('ignore')


In [30]:
data = np.loadtxt("spambase/spambase.data", delimiter=",")
X = data[:, :-1]
y = data[:, -1].astype(int)

scaler = StandardScaler()
X_norm = scaler.fit_transform(X)

n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

Gaussian Naive Bayes Classifier:

In [31]:
class GaussianNaiveBayes:
    def __init__(self):
        self.priors = None
        self.means = None
        self.variances = None
        self.classes = None

    def fit(self, X, y):
        self.classes = np.unique(y)
        n_samples, n_features = X.shape

        self.means = np.zeros((len(self.classes), n_features))
        self.variances = np.zeros((len(self.classes), n_features))
        self.priors = np.zeros(len(self.classes))

        # each class calculation for prior, mean and variance
        for idx, c in enumerate(self.classes):
            X_c = X[y==c]

            self.priors[idx] = len(X_c) / n_samples
            self.means[idx, :] = np.mean(X_c, axis=0)
            self.variances[idx, :] = np.var(X_c, axis=0) + 1e-9

    def gaussian(self, x, mean, variance):
        return (1.0 / np.sqrt(2*np.pi*variance)) * np.exp(-((x-mean)**2) / (2 * variance))

    def predict_single(self, x):
        """Single instance"""
        posteriors = []

        # Calculate P(y=c|x) for each class
        for idx, c in enumerate(self.classes):
            # Start with prior P(y=c)
            posterior = self.priors[idx]

            # Multiply by P(xi|y=c) for each feature
            for i in range(len(x)):
                likelihood = self.gaussian(
                    x[i],
                    self.means[idx, i],
                    self.variances[idx, i]
                )
                posterior *= likelihood

            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        """Multiple instances"""
        return np.array([self.predict_single(x) for x in X])

Bernoulli Classifier:

In [32]:
class BernoulliNaiveBayes:
    def __init__(self):
        self.class_priors = None
        self.means = None  # 2x57 matrix of feature means per class
        self.probs = None  # 2x57 matrix Z[k,i] = P(feature i < mean | class k)
        self.classes = None

    def fit(self, X, y):
        """
        Train BNB by computing class-specific means and Bernoulli probabilities
        """
        self.classes = np.unique(y)
        n_samples, n_features = X.shape

        # Initialize matrices
        self.means = np.zeros((len(self.classes), n_features))
        self.probs = np.zeros((len(self.classes), n_features))
        self.class_priors = np.zeros(len(self.classes))

        # Compute statistics for each class
        for idx, c in enumerate(self.classes):
            X_c = X[y == c]

            # Class prior P(y=c)
            self.class_priors[idx] = len(X_c) / n_samples

            # Mean for each feature
            self.means[idx, :] = np.mean(X_c, axis=0)

            # Z[k,i] = probability of being below mean for feature i in class k
            for i in range(n_features):
                # Count how many samples are below the mean
                below_mean = X_c[:, i] <= self.means[idx, i]
                self.probs[idx, i] = np.mean(below_mean)

    def predict_single(self, x):
        """
        Predict single instance using Bernoulli distribution
        """
        posteriors = []

        for idx, c in enumerate(self.classes):
            # Start with prior P(y=c)
            posterior = self.class_priors[idx]

            # Multiply by P(xi|y=c) for each feature
            for i in range(len(x)):
                # Bernoulli probability based on whether xi > mean[k,i]
                if x[i] > self.means[idx, i]:
                    # P(xi > mean | y=c) = 1 - Z[k,i]
                    prob = 1 - self.probs[idx, i]
                else:
                    # P(xi <= mean | y=c) = Z[k,i]
                    prob = self.probs[idx, i]

                # Avoid zero probability
                prob = max(prob, 1e-10)
                posterior *= prob

            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        """
        Predict multiple instances
        """
        return np.array([self.predict_single(x) for x in X])

Non-Parametric Classifier: Histogram 4 bin

In [33]:
class HistogramNaiveBayes:
    def __init__(self, n_bins=4, alpha=1.0):
        self.n_bins = n_bins
        self.alpha = alpha  # Laplace smoothing parameter
        self.class_priors = None
        self.bin_edges = {}
        self.bin_probs = {}
        self.classes = None

    def fit(self, X, y):
        """
        Train HNB by computing class-specific 4-bin histograms
        """
        self.classes = np.unique(y)
        n_samples, n_features = X.shape

        self.class_priors = np.zeros(len(self.classes))

        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.class_priors[idx] = len(X_c) / n_samples

            self.bin_edges[c] = {}
            self.bin_probs[c] = {}

            for feature_idx in range(n_features):
                feature_values = X_c[:, feature_idx]

                # CRITICAL FIX: Use mean instead of median as per TA specification
                # Edges should be: [min, Q1, mean, Q3, max]
                min_val = np.min(feature_values)
                q1 = np.percentile(feature_values, 25)
                mean_val = np.mean(feature_values)  # Changed from median to mean
                q3 = np.percentile(feature_values, 75)
                max_val = np.max(feature_values)

                # Create edges array
                edges = np.array([min_val, q1, mean_val, q3, max_val])

                # Sort edges (important since mean might not be between Q1 and Q3)
                edges = np.sort(edges)

                # Remove duplicates while keeping at least n_bins+1 edges
                unique_edges = []
                for edge in edges:
                    if len(unique_edges) == 0 or edge > unique_edges[-1] + 1e-10:
                        unique_edges.append(edge)

                # If we have fewer unique edges, create uniform bins
                if len(unique_edges) < self.n_bins + 1:
                    unique_edges = np.linspace(min_val,
                                              max_val + 1e-10,
                                              self.n_bins + 1).tolist()

                edges = np.array(unique_edges)

                # Ensure the last edge includes the maximum value
                edges[-1] = max_val + 1e-10

                # Calculate histogram
                hist, _ = np.histogram(feature_values, bins=edges)

                # Apply Laplace smoothing
                hist_smooth = hist + self.alpha

                # Normalize to get probabilities
                hist_prob = hist_smooth / np.sum(hist_smooth)

                self.bin_edges[c][feature_idx] = edges
                self.bin_probs[c][feature_idx] = hist_prob

    def get_bin_probability(self, value, class_label, feature_idx):
        """
        Get bin probability for a value with proper handling
        """
        edges = self.bin_edges[class_label][feature_idx]
        probs = self.bin_probs[class_label][feature_idx]

        # Find which bin the value falls into
        # Use searchsorted for more reliable binning
        bin_idx = np.searchsorted(edges[:-1], value, side='right') - 1

        # Handle edge cases
        if bin_idx < 0:
            bin_idx = 0
        elif bin_idx >= len(probs):
            bin_idx = len(probs) - 1

        return probs[bin_idx]

    def predict_single(self, x):
        """
        Predict single instance using histogram probabilities
        """
        # Use log probabilities for numerical stability
        log_posteriors = []

        for idx, c in enumerate(self.classes):
            # Start with log prior
            log_posterior = np.log(self.class_priors[idx] + 1e-10)

            # Add log probabilities for each feature
            for i in range(len(x)):
                prob = self.get_bin_probability(x[i], c, i)
                log_posterior += np.log(prob + 1e-10)

            log_posteriors.append(log_posterior)

        return self.classes[np.argmax(log_posteriors)]

    def predict(self, X):
        """
        Predict multiple instances
        """
        return np.array([self.predict_single(x) for x in X])

K-FOLD Cross Val:

In [34]:
def evaluate_model_kfold(model_class, X, y, n_folds=5):
    """
    Evaluate model using k-fold cross validation
    """
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

    train_accuracies = []
    test_accuracies = []

    for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
        # Split data
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        # Normalize data
        scaler = StandardScaler()
        X_train_norm = scaler.fit_transform(X_train)
        X_test_norm = scaler.transform(X_test)

        # Train model
        model = model_class()
        model.fit(X_train_norm, y_train)

        # Evaluate
        y_pred_train = model.predict(X_train_norm)
        y_pred_test = model.predict(X_test_norm)

        train_acc = accuracy_score(y_train, y_pred_train)
        test_acc = accuracy_score(y_test, y_pred_test)

        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

        print(f"Fold {fold}: Train Acc = {train_acc:.4f}, Test Acc = {test_acc:.4f}")

    avg_train = np.mean(train_accuracies)
    avg_test = np.mean(test_accuracies)
    std_train = np.std(train_accuracies)
    std_test = np.std(test_accuracies)

    return {
        'train_accs': train_accuracies,
        'test_accs': test_accuracies,
        'avg_train': avg_train,
        'avg_test': avg_test,
        'std_train': std_train,
        'std_test': std_test
    }

# Evaluate all three models
print("="*50)
print("Gaussian Naive Bayes Results:")
print("="*50)
gnb_results = evaluate_model_kfold(GaussianNaiveBayes, X, y, n_folds=5)
print(f"\nAverage Train Accuracy: {gnb_results['avg_train']:.4f} ± {gnb_results['std_train']:.4f}")
print(f"Average Test Accuracy: {gnb_results['avg_test']:.4f} ± {gnb_results['std_test']:.4f}")

print("\n" + "="*50)
print("Bernoulli Naive Bayes Results:")
print("="*50)
bnb_results = evaluate_model_kfold(BernoulliNaiveBayes, X, y, n_folds=5)
print(f"\nAverage Train Accuracy: {bnb_results['avg_train']:.4f} ± {bnb_results['std_train']:.4f}")
print(f"Average Test Accuracy: {bnb_results['avg_test']:.4f} ± {bnb_results['std_test']:.4f}")

print("\n" + "="*50)
print("Histogram Naive Bayes Results:")
print("="*50)
hnb_results = evaluate_model_kfold(HistogramNaiveBayes, X, y, n_folds=5)
print(f"\nAverage Train Accuracy: {hnb_results['avg_train']:.4f} ± {hnb_results['std_train']:.4f}")
print(f"Average Test Accuracy: {hnb_results['avg_test']:.4f} ± {hnb_results['std_test']:.4f}")

Gaussian Naive Bayes Results:
Fold 1: Train Acc = 0.8204, Test Acc = 0.8208
Fold 2: Train Acc = 0.8047, Test Acc = 0.8033
Fold 3: Train Acc = 0.8245, Test Acc = 0.7946
Fold 4: Train Acc = 0.8250, Test Acc = 0.8228
Fold 5: Train Acc = 0.8215, Test Acc = 0.8337

Average Train Accuracy: 0.8192 ± 0.0075
Average Test Accuracy: 0.8150 ± 0.0141

Bernoulli Naive Bayes Results:
Fold 1: Train Acc = 0.8938, Test Acc = 0.9001
Fold 2: Train Acc = 0.8894, Test Acc = 0.8848
Fold 3: Train Acc = 0.8970, Test Acc = 0.8880
Fold 4: Train Acc = 0.8998, Test Acc = 0.8783
Fold 5: Train Acc = 0.8946, Test Acc = 0.9065

Average Train Accuracy: 0.8949 ± 0.0034
Average Test Accuracy: 0.8915 ± 0.0103

Histogram Naive Bayes Results:
Fold 1: Train Acc = 0.7367, Test Acc = 0.7242
Fold 2: Train Acc = 0.7571, Test Acc = 0.7457
Fold 3: Train Acc = 0.7558, Test Acc = 0.7620
Fold 4: Train Acc = 0.7590, Test Acc = 0.7717
Fold 5: Train Acc = 0.7571, Test Acc = 0.7652

Average Train Accuracy: 0.7532 ± 0.0083
Average Test Ac

In [35]:
# Test the fixed Histogram NB
print("="*50)
print("Fixed Histogram Naive Bayes Results:")
print("="*50)

# Use the alternative implementation
hnb_fixed_results = evaluate_model_kfold(HistogramNaiveBayes, X, y, n_folds=5)

print(f"\nAverage Train Accuracy: {hnb_fixed_results['avg_train']:.4f} ± {hnb_fixed_results['std_train']:.4f}")
print(f"Average Test Accuracy: {hnb_fixed_results['avg_test']:.4f} ± {hnb_fixed_results['std_test']:.4f}")

# Create updated summary table
print("\n" + "="*60)
print("FINAL SUMMARY TABLE - All Models (5-Fold CV)")
print("="*60)
print(f"{'Model':<20} {'Train Acc':<15} {'Test Acc':<15}")
print("-"*60)
print(f"{'Gaussian NB':<20} {gnb_results['avg_train']:.4f} ± {gnb_results['std_train']:.4f}  "
      f"{gnb_results['avg_test']:.4f} ± {gnb_results['std_test']:.4f}")
print(f"{'Bernoulli NB':<20} {bnb_results['avg_train']:.4f} ± {bnb_results['std_train']:.4f}  "
      f"{bnb_results['avg_test']:.4f} ± {bnb_results['std_test']:.4f}")
print(f"{'Histogram NB (Fixed)':<20} {hnb_fixed_results['avg_train']:.4f} ± {hnb_fixed_results['std_train']:.4f}  "
      f"{hnb_fixed_results['avg_test']:.4f} ± {hnb_fixed_results['std_test']:.4f}")

Fixed Histogram Naive Bayes Results:
Fold 1: Train Acc = 0.7367, Test Acc = 0.7242
Fold 2: Train Acc = 0.7571, Test Acc = 0.7457
Fold 3: Train Acc = 0.7558, Test Acc = 0.7620
Fold 4: Train Acc = 0.7590, Test Acc = 0.7717
Fold 5: Train Acc = 0.7571, Test Acc = 0.7652

Average Train Accuracy: 0.7532 ± 0.0083
Average Test Accuracy: 0.7538 ± 0.0171

FINAL SUMMARY TABLE - All Models (5-Fold CV)
Model                Train Acc       Test Acc       
------------------------------------------------------------
Gaussian NB          0.8192 ± 0.0075  0.8150 ± 0.0141
Bernoulli NB         0.8949 ± 0.0034  0.8915 ± 0.0103
Histogram NB (Fixed) 0.7532 ± 0.0083  0.7538 ± 0.0171


In [36]:
# Test with different alpha values and k-folds
def test_hyperparameters(X, y):
    """
    Test different hyperparameter combinations
    """
    results = []

    # Test different alpha (smoothing) values
    alphas = [0.01, 0.1, 0.5, 1.0, 2.0, 5.0]
    k_folds = [3, 5, 10]

    print("="*70)
    print("Hyperparameter Tuning for Histogram Naive Bayes")
    print("="*70)

    best_score = 0
    best_params = {}

    for k in k_folds:
        for alpha in alphas:
            # Create model with specific alpha
            class HNBWithAlpha(HistogramNaiveBayes):
                def __init__(self):
                    super().__init__(n_bins=4, alpha=alpha)

            # Run k-fold CV
            kf = KFold(n_splits=k, shuffle=True, random_state=42)
            test_scores = []

            for train_idx, test_idx in kf.split(X):
                X_train, X_test = X[train_idx], X[test_idx]
                y_train, y_test = y[train_idx], y[test_idx]

                # Normalize
                scaler = StandardScaler()
                X_train_norm = scaler.fit_transform(X_train)
                X_test_norm = scaler.transform(X_test)

                # Train and predict
                model = HNBWithAlpha()
                model.fit(X_train_norm, y_train)
                y_pred = model.predict(X_test_norm)

                test_scores.append(accuracy_score(y_test, y_pred))

            avg_score = np.mean(test_scores)
            std_score = np.std(test_scores)

            print(f"k={k:2d}, alpha={alpha:4.2f}: Test Acc = {avg_score:.4f} ± {std_score:.4f}")

            if avg_score > best_score:
                best_score = avg_score
                best_params = {'k_folds': k, 'alpha': alpha, 'std': std_score}

    print("\n" + "="*70)
    print(f"Best Parameters: k={best_params['k_folds']}, alpha={best_params['alpha']:.2f}")
    print(f"Best Test Accuracy: {best_score:.4f} ± {best_params['std']:.4f}")
    print("="*70)

    return best_params

# Run hyperparameter tuning
best_params = test_hyperparameters(X, y)

# Run final evaluation with best parameters
print("\n" + "="*70)
print("Final Evaluation with Best Parameters")
print("="*70)

class OptimizedHNB(HistogramNaiveBayes):
    def __init__(self):
        super().__init__(n_bins=4, alpha=best_params['alpha'])

final_results = evaluate_model_kfold(OptimizedHNB, X, y, n_folds=best_params['k_folds'])
print(f"Final Train Accuracy: {final_results['avg_train']:.4f} ± {final_results['std_train']:.4f}")
print(f"Final Test Accuracy: {final_results['avg_test']:.4f} ± {final_results['std_test']:.4f}")

Hyperparameter Tuning for Histogram Naive Bayes
k= 3, alpha=0.01: Test Acc = 0.7609 ± 0.0098
k= 3, alpha=0.10: Test Acc = 0.7596 ± 0.0116
k= 3, alpha=0.50: Test Acc = 0.7577 ± 0.0128
k= 3, alpha=1.00: Test Acc = 0.7553 ± 0.0139
k= 3, alpha=2.00: Test Acc = 0.7501 ± 0.0134
k= 3, alpha=5.00: Test Acc = 0.7390 ± 0.0154
k= 5, alpha=0.01: Test Acc = 0.7618 ± 0.0137
k= 5, alpha=0.10: Test Acc = 0.7605 ± 0.0138
k= 5, alpha=0.50: Test Acc = 0.7561 ± 0.0166
k= 5, alpha=1.00: Test Acc = 0.7538 ± 0.0171
k= 5, alpha=2.00: Test Acc = 0.7485 ± 0.0177
k= 5, alpha=5.00: Test Acc = 0.7416 ± 0.0205
k=10, alpha=0.01: Test Acc = 0.7533 ± 0.0214
k=10, alpha=0.10: Test Acc = 0.7496 ± 0.0243
k=10, alpha=0.50: Test Acc = 0.7455 ± 0.0239
k=10, alpha=1.00: Test Acc = 0.7424 ± 0.0259
k=10, alpha=2.00: Test Acc = 0.7392 ± 0.0267
k=10, alpha=5.00: Test Acc = 0.7314 ± 0.0292

Best Parameters: k=5, alpha=0.01
Best Test Accuracy: 0.7618 ± 0.0137

Final Evaluation with Best Parameters
Fold 1: Train Acc = 0.7473, Test 

In [37]:
class HistogramNBSpambaseOptimized:
    def __init__(self, n_bins=4, alpha=0.01, global_bins=False):
        """
        Optimized for Spambase dataset characteristics
        global_bins: If True, use global min/max for binning instead of class-specific
        """
        self.n_bins = n_bins
        self.alpha = alpha
        self.global_bins = global_bins
        self.class_priors = None
        self.bin_edges = {}
        self.bin_probs = {}
        self.classes = None
        self.global_edges = {}  # Store global edges if needed

    def fit(self, X, y):
        """
        Train with special handling for Spambase features
        """
        self.classes = np.unique(y)
        n_samples, n_features = X.shape
        self.class_priors = {}

        # If using global bins, compute global edges first
        if self.global_bins:
            for feature_idx in range(n_features):
                feature_values = X[:, feature_idx]

                # Handle features with many zeros (common in Spambase)
                non_zero_values = feature_values[feature_values > 0]

                if len(non_zero_values) > 10:  # If we have enough non-zero values
                    # Use percentiles of non-zero values
                    min_val = 0  # Include zero as minimum
                    q1 = np.percentile(non_zero_values, 25)
                    mean_val = np.mean(non_zero_values)
                    q3 = np.percentile(non_zero_values, 75)
                    max_val = np.max(non_zero_values)
                else:
                    # Use regular percentiles
                    min_val = np.min(feature_values)
                    q1 = np.percentile(feature_values, 25)
                    mean_val = np.mean(feature_values)
                    q3 = np.percentile(feature_values, 75)
                    max_val = np.max(feature_values)

                edges = np.sort([min_val, q1, mean_val, q3, max_val])
                edges = np.unique(edges)

                if len(edges) < self.n_bins + 1:
                    edges = np.linspace(min_val, max_val + 1e-10, self.n_bins + 1)

                self.global_edges[feature_idx] = edges

        for c in self.classes:
            X_c = X[y == c]
            self.class_priors[c] = len(X_c) / n_samples

            self.bin_edges[c] = {}
            self.bin_probs[c] = {}

            for feature_idx in range(n_features):
                if self.global_bins:
                    # Use global edges
                    edges = self.global_edges[feature_idx].copy()
                else:
                    # Class-specific edges
                    feature_values = X_c[:, feature_idx]

                    # Special handling for sparse features
                    zero_ratio = np.mean(feature_values == 0)

                    if zero_ratio > 0.5:  # If more than 50% zeros
                        # Create bins that separate zeros from non-zeros
                        non_zero_values = feature_values[feature_values > 0]
                        if len(non_zero_values) > 3:
                            edges = [0,
                                   np.min(non_zero_values),
                                   np.median(non_zero_values),
                                   np.max(non_zero_values) + 1e-10]
                        else:
                            edges = np.linspace(np.min(feature_values),
                                              np.max(feature_values) + 1e-10,
                                              self.n_bins + 1)
                    else:
                        # Standard approach
                        min_val = np.min(feature_values)
                        q1 = np.percentile(feature_values, 25)
                        mean_val = np.mean(feature_values)
                        q3 = np.percentile(feature_values, 75)
                        max_val = np.max(feature_values)

                        edges = np.sort([min_val, q1, mean_val, q3, max_val])
                        edges = np.unique(edges)

                        if len(edges) < self.n_bins + 1:
                            edges = np.linspace(min_val, max_val + 1e-10, self.n_bins + 1)

                edges[-1] += 1e-10  # Ensure max is included

                # Calculate histogram
                hist, _ = np.histogram(X_c[:, feature_idx], bins=edges)

                # Adaptive smoothing based on sample size
                adaptive_alpha = self.alpha * (1000 / len(X_c))  # Scale smoothing by sample size
                hist_smooth = hist + adaptive_alpha

                # Normalize
                hist_prob = hist_smooth / np.sum(hist_smooth)

                self.bin_edges[c][feature_idx] = edges
                self.bin_probs[c][feature_idx] = hist_prob

    def predict(self, X):
        """
        Predict with log probabilities
        """
        predictions = []

        for x in X:
            log_posteriors = {}

            for c in self.classes:
                log_post = np.log(self.class_priors[c] + 1e-10)

                for j in range(len(x)):
                    edges = self.bin_edges[c][j]
                    probs = self.bin_probs[c][j]

                    # Find bin
                    bin_idx = np.searchsorted(edges[:-1], x[j], side='right') - 1
                    bin_idx = np.clip(bin_idx, 0, len(probs) - 1)

                    log_post += np.log(probs[bin_idx] + 1e-10)

                log_posteriors[c] = log_post

            predictions.append(max(log_posteriors, key=log_posteriors.get))

        return np.array(predictions)

# Test different configurations
print("="*70)
print("Testing Different Histogram Configurations")
print("="*70)

configurations = [
    {"n_bins": 4, "alpha": 0.01, "global_bins": False, "name": "Original (4 bins, class-specific)"},
    {"n_bins": 4, "alpha": 0.01, "global_bins": True, "name": "Global bins"},
    {"n_bins": 8, "alpha": 0.01, "global_bins": False, "name": "8 bins"},
    {"n_bins": 10, "alpha": 0.01, "global_bins": False, "name": "10 bins"},
    {"n_bins": 16, "alpha": 0.01, "global_bins": False, "name": "16 bins"},
]

best_config = None
best_accuracy = 0

for config in configurations:
    class TestHNB(HistogramNBSpambaseOptimized):
        def __init__(self):
            super().__init__(n_bins=config["n_bins"],
                           alpha=config["alpha"],
                           global_bins=config["global_bins"])

    # Quick 3-fold test
    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    scores = []

    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        scaler = StandardScaler()
        X_train_norm = scaler.fit_transform(X_train)
        X_test_norm = scaler.transform(X_test)

        model = TestHNB()
        model.fit(X_train_norm, y_train)
        y_pred = model.predict(X_test_norm)
        scores.append(accuracy_score(y_test, y_pred))

    avg_score = np.mean(scores)
    print(f"{config['name']:<35} Test Acc: {avg_score:.4f}")

    if avg_score > best_accuracy:
        best_accuracy = avg_score
        best_config = config

print("\n" + "="*70)
print(f"Best Configuration: {best_config['name']}")
print(f"Best Accuracy: {best_accuracy:.4f}")
print("="*70)

Testing Different Histogram Configurations
Original (4 bins, class-specific)   Test Acc: 0.7609
Global bins                         Test Acc: 0.6449
8 bins                              Test Acc: 0.8420
10 bins                             Test Acc: 0.8624
16 bins                             Test Acc: 0.8789

Best Configuration: 16 bins
Best Accuracy: 0.8789
