## Test BinaryBART

In [1]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score, roc_auc_score
from sklearn.datasets import make_classification, load_breast_cancer, load_wine
from sklearn.preprocessing import StandardScaler
import pandas as pd
from bart_playground import *
from bart_playground.bart import DefaultBART, BinaryBART
import bartz

In [2]:
# Parameters
N_TREES = 50
NDPOST = 500
NSKIP = 500
RANDOM_STATE = 42

In [3]:
# Load datasets
def load_datasets():
    # Synthetic dataset
    X_syn, y_syn = make_classification(n_samples=400, n_features=8, n_informative=6, 
                                       n_redundant=0, n_classes=2, random_state=RANDOM_STATE)
    
    # Breast cancer dataset
    X_bc, y_bc = load_breast_cancer(return_X_y=True)
    
    # Wine dataset (convert to binary: class 0 vs rest)
    X_wine, y_wine = load_wine(return_X_y=True)
    y_wine = (y_wine == 0).astype(int)
    
    return {
        "Synthetic": (X_syn, y_syn),
        "Breast Cancer": (X_bc, y_bc),
        "Wine Binary": (X_wine, y_wine)
    }

In [4]:
def evaluate_model(model, model_name, X_train, X_test, y_train, y_test):
    """Evaluate a single model and return metrics"""
    
    if model_name == "Bartz":
        # Bartz regression treating 0/1 as continuous
        fit_result = bartz.BART.gbart(
            x_train=X_train.T, y_train=y_train.astype(float),
            x_test=X_test.T,
            ntree=N_TREES, ndpost=NDPOST, nskip=NSKIP,
            seed=RANDOM_STATE,
            printevery=NDPOST + NSKIP + 100
        )
        btpred_all = fit_result.predict(np.transpose(X_test))
        btpred = np.mean(np.array(btpred_all), axis=0)
        y_pred_prob = np.clip(btpred, 1e-9, 1 - 1e-9)
        y_pred = (y_pred_prob > 0.5).astype(int)
        
    elif model_name == "BinaryBART" or model_name == "LogisticBART":
        # Proper binary BART
        model.fit(X_train, y_train)
        proba_output = model.predict_proba(X_test)
        y_pred_prob = proba_output[:, 1]
        y_pred = model.predict(X_test)
        
    elif model_name == "RandomForestClassifier":
        # Native binary classifier
        model.fit(X_train, y_train)
        y_pred_prob = model.predict_proba(X_test)[:, 1]
        y_pred = model.predict(X_test)
        
    else:
        # Regression methods treating 0/1 as continuous
        model.fit(X_train, y_train)
        raw_pred = model.predict(X_test)
        y_pred_prob = np.clip(raw_pred, 1e-9, 1 - 1e-9)
        y_pred = (y_pred_prob > 0.5).astype(int)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    logloss = log_loss(y_test, y_pred_prob)
    auc = roc_auc_score(y_test, y_pred_prob)
    
    return {'Accuracy': accuracy, 'LogLoss': logloss, 'AUC': auc}

In [5]:
# If debug then run with only one dataset and record running time
# Otherwise run with all datasets
debug = False

In [None]:
from bart_playground.bart import LogisticBART

old_settings = np.seterr(invalid='raise')

datasets = load_datasets()
results = []

In [10]:
# Print dataset shapes
for name, (X, y) in datasets.items():
    print(f"Dataset: {name}, X shape: {X.shape}, y shape: {y.shape}")

Dataset: Synthetic, X shape: (400, 8), y shape: (400,)
Dataset: Breast Cancer, X shape: (569, 30), y shape: (569,)
Dataset: Wine Binary, X shape: (178, 13), y shape: (178,)


In [None]:
for dataset_name, (X, y) in datasets.items():
    print(f"\n=== Testing on {dataset_name} ===")
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=RANDOM_STATE, stratify=y
    )
    
    # Define models
    models = {
        "RFClassifier": RandomForestClassifier(n_estimators=N_TREES, random_state=RANDOM_STATE),
        "RFRegressor": RandomForestRegressor(n_estimators=N_TREES, random_state=RANDOM_STATE),
        "DefaultBART": DefaultBART(n_trees=N_TREES, ndpost=NDPOST, nskip=NSKIP, random_state=RANDOM_STATE),
        "BinaryBART": BinaryBART(n_trees=N_TREES, ndpost=NDPOST, nskip=NSKIP, random_state=RANDOM_STATE),
        "LogisticBART": LogisticBART(n_trees=N_TREES // 2, ndpost=NDPOST, nskip=NSKIP, random_state=RANDOM_STATE),
        "Bartz": "placeholder"
    }
    
    for model_name, model in models.items():
        print(f"  Training {model_name}...")
        
        X_tr, X_te = X_train, X_test
        
        if(debug == True):
            continue    
        metrics = evaluate_model(model, model_name, X_tr, X_te, y_train, y_test)
        
        result = {'Dataset': dataset_name, 'Model': model_name, **metrics}
        results.append(result)
        
        print(f"    Acc: {metrics['Accuracy']:.3f}, LogLoss: {metrics['LogLoss']:.3f}, AUC: {metrics['AUC']:.3f}")


=== Testing on Synthetic ===
  Training RFClassifier...
    Acc: 0.900, LogLoss: 2.072, AUC: 0.900
  Training RFRegressor...
    Acc: 0.858, LogLoss: 0.341, AUC: 0.932
  Training DefaultBART...


Iterations: 100%|██████████| 1000/1000 [00:14<00:00, 68.64it/s]


    Acc: 0.867, LogLoss: 0.319, AUC: 0.952
  Training BinaryBART...


Iterations: 100%|██████████| 1000/1000 [00:10<00:00, 92.03it/s]


    Acc: 0.808, LogLoss: 0.554, AUC: 0.875
  Training LogisticBART...


Iterations: 100%|██████████| 1000/1000 [00:10<00:00, 91.17it/s]
INFO:2025-06-03 16:27:23,076:jax._src.xla_bridge:867: Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
INFO:jax._src.xla_bridge:Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory


    Acc: 0.867, LogLoss: 0.326, AUC: 0.936
  Training Bartz...
    Acc: 0.883, LogLoss: 0.329, AUC: 0.949

=== Testing on Breast Cancer ===
  Training RFClassifier...
    Acc: 0.924, LogLoss: 1.575, AUC: 0.920
  Training RFRegressor...
    Acc: 0.942, LogLoss: 0.118, AUC: 0.989
  Training DefaultBART...


Iterations: 100%|██████████| 1000/1000 [00:10<00:00, 96.31it/s]


    Acc: 0.936, LogLoss: 0.146, AUC: 0.984
  Training BinaryBART...


Iterations: 100%|██████████| 1000/1000 [00:10<00:00, 99.08it/s]


    Acc: 0.936, LogLoss: 0.264, AUC: 0.988
  Training LogisticBART...


Iterations: 100%|██████████| 1000/1000 [00:10<00:00, 91.51it/s]


    Acc: 0.965, LogLoss: 0.113, AUC: 0.991
  Training Bartz...
    Acc: 0.947, LogLoss: 0.130, AUC: 0.991

=== Testing on Wine Binary ===
  Training RFClassifier...
    Acc: 0.926, LogLoss: 1.535, AUC: 0.903
  Training RFRegressor...
    Acc: 0.926, LogLoss: 0.519, AUC: 0.950
  Training DefaultBART...


Iterations: 100%|██████████| 1000/1000 [00:07<00:00, 142.74it/s]


    Acc: 0.963, LogLoss: 0.138, AUC: 0.985
  Training BinaryBART...


Iterations: 100%|██████████| 1000/1000 [00:08<00:00, 112.50it/s]


    Acc: 0.815, LogLoss: 0.462, AUC: 0.986
  Training LogisticBART...


Iterations: 100%|██████████| 1000/1000 [00:10<00:00, 93.43it/s]


    Acc: 0.963, LogLoss: 0.133, AUC: 0.991
  Training Bartz...
    Acc: 0.944, LogLoss: 0.117, AUC: 0.994


In [None]:

for dataset_name, (X, y) in datasets.items():
    print(f"\n=== Testing on {dataset_name} ===")
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=RANDOM_STATE, stratify=y
    )
    
    # Define models
    models = {
        "RFClassifier": RandomForestClassifier(n_estimators=N_TREES, random_state=RANDOM_STATE),
        "RFRegressor": RandomForestRegressor(n_estimators=N_TREES, random_state=RANDOM_STATE),
        "DefaultBART": DefaultBART(n_trees=N_TREES, ndpost=NDPOST, nskip=NSKIP, random_state=RANDOM_STATE),
        "BinaryBART": BinaryBART(n_trees=N_TREES, ndpost=NDPOST, nskip=NSKIP, random_state=RANDOM_STATE),
        "LogisticBART": LogisticBART(n_trees=N_TREES // 2, ndpost=NDPOST, nskip=NSKIP, random_state=RANDOM_STATE),
        "Bartz": "placeholder"
    }
    
    for model_name, model in models.items():
        print(f"  Training {model_name}...")
        
        X_tr, X_te = X_train, X_test
        
        if(debug == True):
            continue    
        metrics = evaluate_model(model, model_name, X_tr, X_te, y_train, y_test)
        
        result = {'Dataset': dataset_name, 'Model': model_name, **metrics}
        results.append(result)
        
        print(f"    Acc: {metrics['Accuracy']:.3f}, LogLoss: {metrics['LogLoss']:.3f}, AUC: {metrics['AUC']:.3f}")

In [7]:
metrics = None
 
def record_evaluation_results():
    global metrics
    X_tr, X_te = X_train, X_test
    metrics = evaluate_model(model, model_name, X_tr, X_te, y_train, y_test)

In [8]:
if debug == True:
    X_tr, X_te = X_train, X_test

    %prun -s cumtime -D temp_profile.prof -q record_evaluation_results()

    fname = "profile_logbart9"

    !mv temp_profile.prof {fname}.prof
    !gprof2dot -f pstats {fname}.prof -o {fname}.dot
    !dot -Tpng {fname}.dot -o {fname}.png
    
    result = {'Dataset': dataset_name, 'Model': model_name, **metrics}
    results.append(result)
    
    print(f"    Acc: {metrics['Accuracy']:.3f}, LogLoss: {metrics['LogLoss']:.3f}, AUC: {metrics['AUC']:.3f}")

In [9]:
# Display results
results_df = pd.DataFrame(results)
print("\n" + "="*60)
print("SUMMARY RESULTS")
print("="*60)

# Pivot tables for easy comparison
for metric in ['Accuracy', 'AUC', 'LogLoss']:
    print(f"\n{metric}:")
    pivot = results_df.pivot_table(index='Dataset', columns='Model', values=metric)
    print(pivot.round(3))


SUMMARY RESULTS

Accuracy:
Model          Bartz  BinaryBART  DefaultBART  LogisticBART  RFClassifier  \
Dataset                                                                     
Breast Cancer  0.947       0.936        0.936         0.965         0.924   
Synthetic      0.883       0.808        0.867         0.867         0.900   
Wine Binary    0.944       0.815        0.963         0.963         0.926   

Model          RFRegressor  
Dataset                     
Breast Cancer        0.942  
Synthetic            0.858  
Wine Binary          0.926  

AUC:
Model          Bartz  BinaryBART  DefaultBART  LogisticBART  RFClassifier  \
Dataset                                                                     
Breast Cancer  0.991       0.988        0.984         0.991         0.920   
Synthetic      0.949       0.875        0.952         0.936         0.900   
Wine Binary    0.994       0.986        0.985         0.991         0.903   

Model          RFRegressor  
Dataset             