## Test Multi-class Classification
<sup>*</sup>Including LogisticBART for n_cat > 2

In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score, roc_auc_score
from sklearn.datasets import make_classification, load_wine, load_iris
import pandas as pd
from bart_playground import *
from bart_playground.bart import LogisticBART

In [2]:
# Parameters
N_TREES = 25
NDPOST = 1000
NSKIP = 200
RANDOM_STATE = 42

In [3]:
# Load datasets
def load_datasets():
    # Synthetic 3-class dataset
    X_syn, y_syn = make_classification(n_samples=400, n_features=8, n_informative=6, 
                                       n_redundant=0, n_classes=3, n_clusters_per_class=1,
                                       random_state=RANDOM_STATE)
    
    # Iris dataset (3 classes)
    X_iris, y_iris = load_iris(return_X_y=True)
    
    # Wine dataset (3 classes)
    X_wine, y_wine = load_wine(return_X_y=True)
    
    # Synthetic 4-class dataset
    X_syn4, y_syn4 = make_classification(n_samples=600, n_features=10, n_informative=8,
                                         n_redundant=0, n_classes=4, n_clusters_per_class=1,
                                         random_state=RANDOM_STATE)
    
    return {
        "Synthetic 3-class": (X_syn, y_syn),
        "Iris": (X_iris, y_iris),
        "Wine": (X_wine, y_wine),
        "Synthetic 4-class": (X_syn4, y_syn4)
    }

In [4]:
from sklearn.metrics import roc_auc_score


def evaluate_model(model, model_name, X_train, X_test, y_train, y_test):
    """Evaluate a single model and return metrics"""
    
    if model_name == "LogisticBART":
        # Multi-class LogisticBART
        model.fit(X_train, y_train)
        y_pred_proba = model.predict_proba(X_test)
        y_pred = model.predict(X_test)
        
    elif model_name == "RFClassifier":
        # Multi-class Random Forest
        model.fit(X_train, y_train)
        y_pred_proba = model.predict_proba(X_test)
        y_pred = model.predict(X_test)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    logloss = log_loss(y_test, y_pred_proba)
    auc_ovr = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='macro')

    return {'Accuracy': accuracy, 'LogLoss': logloss, 'AUC_OvR': auc_ovr}

In [5]:
# If debug then run with only one dataset and record running time
# Otherwise run with all datasets
debug = False

In [6]:
old_settings = np.seterr(invalid='raise')

datasets = load_datasets()
results = []

In [7]:
for name, (X, y) in datasets.items():
    # Print dataset shapes
    print(f"Dataset: {name}\nX shape: {X.shape}, y shape: {y.shape}")
    # Print class distribution
    print(f"Class distribution: {pd.Series(y).value_counts(normalize=True).sort_index().to_dict()}")
    print(f"Number of classes: {len(np.unique(y))}")
    print()

Dataset: Synthetic 3-class
X shape: (400, 8), y shape: (400,)
Class distribution: {0: 0.3375, 1: 0.33, 2: 0.3325}
Number of classes: 3

Dataset: Iris
X shape: (150, 4), y shape: (150,)
Class distribution: {0: 0.3333333333333333, 1: 0.3333333333333333, 2: 0.3333333333333333}
Number of classes: 3

Dataset: Wine
X shape: (178, 13), y shape: (178,)
Class distribution: {0: 0.33146067415730335, 1: 0.398876404494382, 2: 0.2696629213483146}
Number of classes: 3

Dataset: Synthetic 4-class
X shape: (600, 10), y shape: (600,)
Class distribution: {0: 0.24666666666666667, 1: 0.24833333333333332, 2: 0.25666666666666665, 3: 0.24833333333333332}
Number of classes: 4



In [8]:
for dataset_name, (X, y) in datasets.items():
    print(f"\n=== Testing on {dataset_name} ===")
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=RANDOM_STATE, stratify=y
    )
    
    # Define models
    models = {
        "RFClassifier": RandomForestClassifier(random_state=RANDOM_STATE),
        "LogisticBART": LogisticBART(n_trees=N_TREES, ndpost=NDPOST, nskip=NSKIP, random_state=RANDOM_STATE)
    }
    
    for model_name, model in models.items():
        print(f"  Training {model_name}...")
        
        X_tr, X_te = X_train, X_test
        
        if(debug == True):
            continue    
        metrics = evaluate_model(model, model_name, X_tr, X_te, y_train, y_test)
        
        result = {'Dataset': dataset_name, 'Model': model_name, **metrics}
        results.append(result)
        
        print(f"    Acc: {metrics['Accuracy']:.3f}, LogLoss: {metrics['LogLoss']:.3f}, AUC_OvR: {metrics['AUC_OvR']:.3f}")


=== Testing on Synthetic 3-class ===
  Training RFClassifier...
    Acc: 0.850, LogLoss: 0.416, AUC_OvR: 0.961
  Training LogisticBART...


Iterations: 100%|██████████| 1200/1200 [00:17<00:00, 67.15it/s]


    Acc: 0.842, LogLoss: 0.371, AUC_OvR: 0.967

=== Testing on Iris ===
  Training RFClassifier...
    Acc: 0.889, LogLoss: 0.194, AUC_OvR: 0.989
  Training LogisticBART...


Iterations: 100%|██████████| 1200/1200 [00:22<00:00, 53.52it/s]


    Acc: 0.911, LogLoss: 0.194, AUC_OvR: 0.993

=== Testing on Wine ===
  Training RFClassifier...
    Acc: 1.000, LogLoss: 0.130, AUC_OvR: 1.000
  Training LogisticBART...


Iterations: 100%|██████████| 1200/1200 [00:15<00:00, 76.15it/s]


    Acc: 1.000, LogLoss: 0.155, AUC_OvR: 1.000

=== Testing on Synthetic 4-class ===
  Training RFClassifier...
    Acc: 0.844, LogLoss: 0.557, AUC_OvR: 0.968
  Training LogisticBART...


Iterations: 100%|██████████| 1200/1200 [00:20<00:00, 57.18it/s]


    Acc: 0.833, LogLoss: 0.543, AUC_OvR: 0.954


In [9]:
metrics = None
 
def record_evaluation_results():
    global metrics
    X_tr, X_te = X_train, X_test
    metrics = evaluate_model(model, model_name, X_tr, X_te, y_train, y_test)

In [10]:
if debug == True:
    X_tr, X_te = X_train, X_test
    record_evaluation_results()
    
    # %prun -s cumtime -D temp_profile.prof -q record_evaluation_results()

    # fname = "profile_multiclass_logisticbart"

    # !mv temp_profile.prof {fname}.prof
    # !gprof2dot -f pstats {fname}.prof -o {fname}.dot
    # !dot -Tpng {fname}.dot -o {fname}.png
    
    result = {'Dataset': dataset_name, 'Model': model_name, **metrics}
    results.append(result)
    
    print(f"    Acc: {metrics['Accuracy']:.3f}, LogLoss: {metrics['LogLoss']:.3f}")

In [12]:
# Display results
results_df = pd.DataFrame(results)
print("\n" + "="*60)
print("SUMMARY RESULTS")
print("="*60)

# Pivot tables for easy comparison
for metric in ['Accuracy', 'LogLoss', 'AUC_OvR']:
    print(f"\n{metric}:")
    pivot = results_df.pivot_table(index='Dataset', columns='Model', values=metric)
    print(pivot.round(3))


SUMMARY RESULTS

Accuracy:
Model              LogisticBART  RFClassifier
Dataset                                      
Iris                      0.911         0.889
Synthetic 3-class         0.842         0.850
Synthetic 4-class         0.833         0.844
Wine                      1.000         1.000

LogLoss:
Model              LogisticBART  RFClassifier
Dataset                                      
Iris                      0.194         0.194
Synthetic 3-class         0.371         0.416
Synthetic 4-class         0.543         0.557
Wine                      0.155         0.130

AUC_OvR:
Model              LogisticBART  RFClassifier
Dataset                                      
Iris                      0.993         0.989
Synthetic 3-class         0.967         0.961
Synthetic 4-class         0.954         0.968
Wine                      1.000         1.000
