In [1]:
import sys
sys.path.append("../")

import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rc('text', usetex=False)

import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import scipy

from rsep_explain.variables import auto_var
from utils import params_to_dataframe
from params import RiskSlimExperiments3, LinSepBBMRobExperiments3, RobDTInterpretRobExperiments3, DTInterpretRobExperiments3, XgboostRobDTInterpretRobExperiments
from IPython.display import display

fontsize = 16

In [2]:
def display_results(exp, result_file_dir='./results/calc_separation', columns=None):
    _, _, grid_params, _ = exp()()
    df = params_to_dataframe(grid_params,
                             columns=columns,
                             result_file_dir=result_file_dir,
                             logging_level=0)
    return df

In [3]:
def proc_bbm_result(df, rsep=0.1):
    depths, unique_clfs, accs, ers = [], [], [], []
    df = df.loc[df['rsep'] == rsep].copy()
    print(len(df))
    for i, d in df.iterrows():
        depths.append(d['bbm_results']['depth'])
        unique_clfs.append(len(set(d['bbm_results']['learned_model'])))
        accs.append(d['bbm_results']['tst acc'])
        ers.append(d['bbm_results']['er dist'].mean())
        
    df['depth'] = depths
    df['unique weak leaners'] = unique_clfs
    df['tst acc'] = accs
    df['rob'] = ers
    return df[['dataset', 'random_seed', 'depth', 'unique weak leaners', 'tst acc', 'rob']]

def proc_riskslim_result(df):
    depths, accs, ers = [], [], []
    for i, d in df.iterrows():
        depths.append((d['linsep_results'][0]['w'] != 0).sum())
        accs.append(d['linsep_results'][0]['tst acc'])
        ers.append(d['linsep_results'][0]['er dist'].mean())
        
    df['depth'] = depths
    df['tst acc'] = accs
    df['rob'] = ers
    return df[['dataset', 'random_seed', 'depth', 'tst acc', 'rob']]


def proc_dt_result(df):
    depths, accs, ers = [], [], []
    for i, d in df.iterrows():
        depths.append(d['best_clf'].tree_.node_count - d['best_clf'].tree_.n_leaves)
        #depths.append(d['best_clf'].tree_.node_count)
        #depths.append(d['best_clf'].tree_.n_leaves)
        accs.append(d['tst acc'])
        ers.append(d['er dist'])
        
    df['depth'] = depths
    df['tst acc'] = accs
    df['rob'] = ers
    return df[['dataset', 'random_seed', 'depth', 'tst acc', 'rob']]

from experiments.xgboostrobdt_interpret_rob import get_xgb_depth_leaves_nodes
def proc_xgb_result(df, rsep=0.1):
    depths, accs, ers = [], [], []
    df = df.loc[df['rsep'] == rsep].copy()
    for i, d in df.iterrows():
        _, leaves, nodes = get_xgb_depth_leaves_nodes(d['best_clf'])
        depths.append(nodes - leaves)
        accs.append(d['tst acc'])
        ers.append(d['er dist'].mean())
        
    df['depth'] = depths
    df['tst acc'] = accs
    df['rob'] = ers
    return df[['dataset', 'random_seed', 'depth', 'tst acc', 'rob']]

In [4]:
#df_bbm_ori = display_results(LinSepBBMRobExperiments3, '../results/bbm_rob_lin_noise_3/',['n_samples', 'bbm_results'])

In [21]:
df_bbm_ori_ori = display_results(LinSepBBMRobExperiments3, '../results/lin_sep_bbm_rob_3', ['n_samples', 'bbm_results'])

In [22]:
columns = ['dataset', 'random_seed', 'tst acc', 'unique weak leaners', 'rob']
column_names = ['dataset', 'random_seed', 'tst acc', 'depth', 'rob']
df_bbm = proc_bbm_result(df_bbm_ori_ori, rsep=0.05)
df_bbm = df_bbm[columns]
df_bbm.columns = column_names
df_bbm.groupby("dataset").mean()

130


Unnamed: 0_level_0,random_seed,tst acc,depth,rob
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
risk_adult,4.5,0.812274,6.0,0.500014
risk_bank,4.5,0.8963,8.0,0.500003
risk_bank_2,4.5,0.896822,4.5,0.498001
risk_breastcancer,4.5,0.957522,11.0,0.274703
risk_careval,4.5,0.769702,8.7,0.495063
risk_compasbin,4.5,0.662456,7.6,0.331743
risk_diabetes,4.5,0.65,2.1,0.145283
risk_ficobin,4.5,0.721118,11.8,0.500044
risk_heart,4.5,0.822222,9.5,0.323431
risk_ionosphere,4.5,0.856897,6.8,0.275112


In [16]:
columns = ['dataset', 'random_seed', 'tst acc', 'depth', 'rob']
column_names = ['dataset', 'random_seed', 'tst acc', 'depth', 'rob']
df = display_results(RiskSlimExperiments3, '../results/risk_slim_3', ['n_samples', 'linsep_results'])
df = proc_riskslim_result(df)
rudin_df = df[columns]
rudin_df.columns = column_names
rudin_df.groupby("dataset").mean()

Unnamed: 0_level_0,random_seed,tst acc,depth,rob
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
risk_adult,4.5,0.824195,14.9,0.12385
risk_bank,4.5,0.896447,8.9,0.198242
risk_bank_2,4.5,0.899676,13.8,0.104087
risk_breastcancer,4.5,0.95708,6.0,0.277423
risk_careval,4.5,0.906305,10.1,0.190866
risk_compasbin,4.5,0.65114,5.4,0.147302
risk_diabetes,4.5,0.762598,6.0,0.092907
risk_ficobin,4.5,0.713094,6.4,0.221749
risk_heart,4.5,0.816667,11.9,0.137193
risk_ionosphere,4.5,0.878448,17.9,0.070969


In [12]:
columns = ['dataset', 'random_seed', 'tst acc', 'depth', 'rob']
column_names = ['dataset', 'random_seed', 'tst acc', 'depth', 'rob']
dt_df = display_results(DTInterpretRobExperiments3, '../results/dt_interpret_rob_3/', ['tst acc', 'adv tst dist', 'er dist', 'best_clf'])
dt_df = proc_dt_result(dt_df)
dt_df = dt_df[columns]
dt_df.columns = column_names
dt_df.groupby("dataset").mean()

Unnamed: 0_level_0,random_seed,tst acc,depth,rob
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
risk_adult,4.5,0.829769,414.2,0.5001
risk_bank,4.5,0.8973,30.7,0.5001
risk_bank_2,4.5,0.912624,30.0,0.115382
risk_breastcancer,4.5,0.940265,15.2,0.2336
risk_careval,4.5,0.971979,59.3,0.5001
risk_compasbin,4.5,0.666754,67.8,0.5001
risk_diabetes,4.5,0.736614,31.2,0.07566
risk_ficobin,4.5,0.714195,30.6,0.5001
risk_heart,4.5,0.762222,20.3,0.233167
risk_ionosphere,4.5,0.889655,11.3,0.149362


In [None]:
xgb_df = display_results(XgboostRobDTInterpretRobExperiments, '../results/xgboostrobdt_interpret_rob/', ['tst acc', 'adv tst dist', 'er dist', 'best_clf'])

In [11]:
columns = ['dataset', 'random_seed', 'tst acc', 'depth', 'rob']
column_names = ['dataset', 'random_seed', 'tst acc', 'depth', 'rob']
#df = display_results(RobDTInterpretRobExperiments3, '../results/robdt_interpret_rob_3/', ['tst acc', 'adv tst dist', 'depth', 'leaves'])
df = proc_xgb_result(xgb_df, rsep=0.05)
robdt_df = df[columns]
robdt_df.columns = column_names
robdt_df.groupby("dataset").mean()

Unnamed: 0_level_0,random_seed,tst acc,depth,rob
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
risk_adult,4.5,0.829099,287.9,0.4965
risk_bank,4.5,0.897322,26.8,0.5
risk_bank_2,4.5,0.904819,30.7,0.175062
risk_breastcancer,4.5,0.943363,7.4,0.285
risk_careval,4.5,0.957268,28.2,0.5
risk_compasbin,4.5,0.668509,33.7,0.498
risk_diabetes,4.5,0.733465,27.9,0.080972
risk_ficobin,4.5,0.711935,59.6,0.499
risk_heart,4.5,0.793333,13.6,0.306167
risk_ionosphere,4.5,0.921552,8.6,0.249877


In [17]:
ddff = df_bbm.merge(rudin_df, on=['dataset', 'random_seed'], suffixes=("_bbm", ""))
ddff = ddff.merge(dt_df, on=['dataset', 'random_seed'], suffixes=("", "_DT"))
ddff = ddff.merge(robdt_df, on=['dataset', 'random_seed'], suffixes=("", "_robDT"))
ddff.groupby("dataset").mean()

Unnamed: 0_level_0,random_seed,tst acc_bbm,depth_bbm,rob_bbm,tst acc,depth,rob,tst acc_DT,depth_DT,rob_DT,tst acc_robDT,depth_robDT,rob_robDT
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
risk_adult,4.5,0.812274,6.0,0.500014,0.824195,14.9,0.12385,0.829769,414.2,0.5001,0.829099,287.9,0.4965
risk_bank,4.5,0.8963,8.0,0.500003,0.896447,8.9,0.198242,0.8973,30.7,0.5001,0.897322,26.8,0.5
risk_bank_2,4.5,0.896822,4.5,0.498001,0.899676,13.8,0.104087,0.912624,30.0,0.115382,0.904819,30.7,0.175062
risk_breastcancer,4.5,0.957522,11.0,0.274703,0.95708,6.0,0.277423,0.940265,15.2,0.2336,0.943363,7.4,0.285
risk_careval,4.5,0.769702,8.7,0.495063,0.906305,10.1,0.190866,0.971979,59.3,0.5001,0.957268,28.2,0.5
risk_compasbin,4.5,0.662456,7.6,0.331743,0.65114,5.4,0.147302,0.666754,67.8,0.5001,0.668509,33.7,0.498
risk_diabetes,4.5,0.65,2.1,0.145283,0.762598,6.0,0.092907,0.736614,31.2,0.07566,0.733465,27.9,0.080972
risk_ficobin,4.5,0.721118,11.8,0.500044,0.713094,6.4,0.221749,0.714195,30.6,0.5001,0.711935,59.6,0.499
risk_heart,4.5,0.822222,9.5,0.323431,0.816667,11.9,0.137193,0.762222,20.3,0.233167,0.793333,13.6,0.306167
risk_ionosphere,4.5,0.856897,6.8,0.275112,0.878448,17.9,0.070969,0.889655,11.3,0.149362,0.921552,8.6,0.249877


In [18]:
data = {}
#columns = ['depth_DT', 'depth_robDT', 'depth', 'depth_bbm',
#           'tst acc_DT', 'tst acc_robDT', 'tst acc', 'tst acc_bbm',
#           'rob_DT', 'rob_robDT', 'rob', 'rob_bbm'] 
columns = ['depth_DT', 'depth_robDT', 'depth', 'depth_bbm',
           'tst acc_DT', 'tst acc_robDT', 'tst acc', 'tst acc_bbm',
           'rob_DT', 'rob_robDT', 'rob', 'rob_bbm'] 
for name, group in ddff.groupby("dataset"):
    name = name.replace("risk_", "")
    data[name] = []
    mean = group[columns].mean()
    sem = group[columns].sem()
    for m, s in zip(mean, sem):
        data[name].append(f"${m:.2f} \pm {s:.2f}$".replace(" 0.", " ."))
ddff2 = pd.DataFrame.from_dict(data, orient='index')
#ddff2.columns = ["depth", "depth", "\# weak learners", "\# non-zero weights",
#                "test acc. (DT)", "test acc. (RobDT)", "test acc. (BBM)", "test acc. (Rudin's)",
#                "ER (DT)", "ER (RobDT)", "ER (BBM)", "ER (Rudin's)"]
ddff2.columns = ["DT", "RobDT", "Rudin's", "BBM",
                 "DT2", "RobDT2", "Rudin's2", "BBM2",
                 "DT3", "RobDT3", "Rudin's3", "BBM3",]
print(ddff2.to_latex(escape=False, column_format="lcccccccccccc"))

\begin{tabular}{lcccccccccccc}
\toprule
{} &                 DT &               RobDT &           Rudin's &              BBM &             DT2 &          RobDT2 &        Rudin's2 &            BBM2 &             DT3 &          RobDT3 &        Rudin's3 &            BBM3 \\
\midrule
adult        &  $414.20 \pm 5.66$ &  $287.90 \pm 35.66$ &  $14.90 \pm 1.46$ &   $6.00 \pm .60$ &  $0.83 \pm .00$ &  $0.83 \pm .00$ &  $0.82 \pm .00$ &  $0.81 \pm .00$ &  $0.50 \pm .00$ &  $0.50 \pm .00$ &  $0.12 \pm .02$ &  $0.50 \pm .00$ \\
bank         &    $30.70 \pm .15$ &     $26.80 \pm .20$ &    $8.90 \pm .66$ &  $8.00 \pm 1.41$ &  $0.90 \pm .00$ &  $0.90 \pm .00$ &  $0.90 \pm .00$ &  $0.90 \pm .00$ &  $0.50 \pm .00$ &  $0.50 \pm .00$ &  $0.20 \pm .03$ &  $0.50 \pm .00$ \\
bank_2       &    $30.00 \pm .30$ &     $30.70 \pm .15$ &  $13.80 \pm 1.54$ &  $4.50 \pm 1.34$ &  $0.91 \pm .00$ &  $0.90 \pm .00$ &  $0.90 \pm .00$ &  $0.90 \pm .00$ &  $0.12 \pm .01$ &  $0.18 \pm .02$ &  $0.10 \pm .01$ &  $0.50 \pm .

In [19]:
def bold_best(df, max_is_best=True):
    for i, d in df.iterrows():
        ents = []
        for ent in d:
            ents.append(float(ent[1:-1].split(" \\pm ")[0]))
        if max_is_best:
            best_id = np.argmax(ents)
        else:
            best_id = np.argmin(ents)
        best_number = float(d[best_id][1:-1].split(" \\pm ")[0])
        for i in range(len(d)):
            if float(d[i][1:-1].split(" \\pm ")[0]) == best_number:
                d[i] =  f"$\\mathbf{{{d[i][1:-1]}}}$"
    return df

d1 = bold_best(ddff2[["DT", "RobDT", "Rudin's", "BBM"]], max_is_best=False)
d2 = bold_best(ddff2[["DT2", "RobDT2", "Rudin's2", "BBM2"]], max_is_best=True)
d3 = bold_best(ddff2[["DT3", "RobDT3", "Rudin's3", "BBM3"]], max_is_best=True)
ddff3 = pd.concat((d1, d2, d3), axis=1)
print(ddff3.to_latex(escape=False, column_format="lcccccccccccc"))
ddff3

\begin{tabular}{lcccccccccccc}
\toprule
{} &                 DT &                    RobDT &                  Rudin's &                       BBM &                      DT2 &                   RobDT2 &                 Rudin's2 &                     BBM2 &                      DT3 &                   RobDT3 &        Rudin's3 &                     BBM3 \\
\midrule
adult        &  $414.20 \pm 5.66$ &       $287.90 \pm 35.66$ &         $14.90 \pm 1.46$ &   $\mathbf{6.00 \pm .60}$ &  $\mathbf{0.83 \pm .00}$ &  $\mathbf{0.83 \pm .00}$ &           $0.82 \pm .00$ &           $0.81 \pm .00$ &  $\mathbf{0.50 \pm .00}$ &  $\mathbf{0.50 \pm .00}$ &  $0.12 \pm .02$ &  $\mathbf{0.50 \pm .00}$ \\
bank         &    $30.70 \pm .15$ &          $26.80 \pm .20$ &           $8.90 \pm .66$ &  $\mathbf{8.00 \pm 1.41}$ &  $\mathbf{0.90 \pm .00}$ &  $\mathbf{0.90 \pm .00}$ &  $\mathbf{0.90 \pm .00}$ &  $\mathbf{0.90 \pm .00}$ &  $\mathbf{0.50 \pm .00}$ &  $\mathbf{0.50 \pm .00}$ &  $0.20 \pm .03$ &  $\mathbf{0

Unnamed: 0,DT,RobDT,Rudin's,BBM,DT2,RobDT2,Rudin's2,BBM2,DT3,RobDT3,Rudin's3,BBM3
adult,$414.20 \pm 5.66$,$287.90 \pm 35.66$,$14.90 \pm 1.46$,$\mathbf{6.00 \pm .60}$,$\mathbf{0.83 \pm .00}$,$\mathbf{0.83 \pm .00}$,$0.82 \pm .00$,$0.81 \pm .00$,$\mathbf{0.50 \pm .00}$,$\mathbf{0.50 \pm .00}$,$0.12 \pm .02$,$\mathbf{0.50 \pm .00}$
bank,$30.70 \pm .15$,$26.80 \pm .20$,$8.90 \pm .66$,$\mathbf{8.00 \pm 1.41}$,$\mathbf{0.90 \pm .00}$,$\mathbf{0.90 \pm .00}$,$\mathbf{0.90 \pm .00}$,$\mathbf{0.90 \pm .00}$,$\mathbf{0.50 \pm .00}$,$\mathbf{0.50 \pm .00}$,$0.20 \pm .03$,$\mathbf{0.50 \pm .00}$
bank_2,$30.00 \pm .30$,$30.70 \pm .15$,$13.80 \pm 1.54$,$\mathbf{4.50 \pm 1.34}$,$\mathbf{0.91 \pm .00}$,$0.90 \pm .00$,$0.90 \pm .00$,$0.90 \pm .00$,$0.12 \pm .01$,$0.18 \pm .02$,$0.10 \pm .01$,$\mathbf{0.50 \pm .00}$
breastcancer,$15.20 \pm 1.25$,$7.40 \pm .60$,$\mathbf{6.00 \pm .00}$,$11.00 \pm .89$,$0.94 \pm .00$,$0.94 \pm .01$,$\mathbf{0.96 \pm .00}$,$\mathbf{0.96 \pm .01}$,$0.23 \pm .01$,$\mathbf{0.29 \pm .01}$,$0.28 \pm .00$,$0.27 \pm .01$
careval,$59.30 \pm 2.22$,$28.20 \pm .65$,$10.10 \pm .97$,$\mathbf{8.70 \pm .47}$,$\mathbf{0.97 \pm .00}$,$0.96 \pm .00$,$0.91 \pm .01$,$0.77 \pm .00$,$\mathbf{0.50 \pm .00}$,$\mathbf{0.50 \pm .00}$,$0.19 \pm .02$,$\mathbf{0.50 \pm .00}$
compasbin,$67.80 \pm 13.01$,$33.70 \pm 3.05$,$\mathbf{5.40 \pm .22}$,$7.60 \pm .16$,$\mathbf{0.67 \pm .00}$,$\mathbf{0.67 \pm .00}$,$0.65 \pm .00$,$0.66 \pm .00$,$\mathbf{0.50 \pm .00}$,$\mathbf{0.50 \pm .00}$,$0.15 \pm .01$,$0.33 \pm .01$
diabetes,$31.20 \pm 6.96$,$27.90 \pm 2.95$,$6.00 \pm .00$,$\mathbf{2.10 \pm .53}$,$0.74 \pm .01$,$0.73 \pm .01$,$\mathbf{0.76 \pm .01}$,$0.65 \pm .01$,$0.08 \pm .01$,$0.08 \pm .00$,$0.09 \pm .00$,$\mathbf{0.15 \pm .05}$
ficobin,$30.60 \pm .22$,$59.60 \pm 29.82$,$\mathbf{6.40 \pm .16}$,$11.80 \pm .65$,$0.71 \pm .00$,$0.71 \pm .00$,$0.71 \pm .00$,$\mathbf{0.72 \pm .00}$,$\mathbf{0.50 \pm .00}$,$\mathbf{0.50 \pm .00}$,$0.22 \pm .01$,$\mathbf{0.50 \pm .00}$
heart,$20.30 \pm 1.60$,$13.60 \pm .88$,$11.90 \pm 1.46$,$\mathbf{9.50 \pm .82}$,$0.76 \pm .01$,$0.79 \pm .01$,$\mathbf{0.82 \pm .01}$,$\mathbf{0.82 \pm .01}$,$0.23 \pm .02$,$0.31 \pm .02$,$0.14 \pm .01$,$\mathbf{0.32 \pm .02}$
ionosphere,$11.30 \pm .98$,$8.60 \pm .76$,$17.90 \pm 3.14$,$\mathbf{6.80 \pm 1.96}$,$0.89 \pm .01$,$\mathbf{0.92 \pm .01}$,$0.88 \pm .01$,$0.86 \pm .01$,$0.15 \pm .01$,$0.25 \pm .01$,$0.07 \pm .01$,$\mathbf{0.28 \pm .01}$


In [20]:
def bold_best(df, max_is_best=True):
    for i, d in df.iterrows():
        ents = []
        for ent in d:
            ents.append(float(ent[1:-1].split(" \\pm ")[0]))
        if max_is_best:
            best_id = np.argmax(ents)
        else:
            best_id = np.argmin(ents)
        best_number = float(d[best_id][1:-1].split(" \\pm ")[0])
        for i in range(len(d)):
            temp = d[i][1:-1].split(' \\pm ')[0]
            if float(d[i][1:-1].split(" \\pm ")[0]) == best_number:
                d[i] = f"$\\mathbf{{{temp}}}$"
            else:
                d[i] = f"${temp}$"
    return df

d1 = bold_best(ddff2[["DT", "RobDT", "Rudin's", "BBM"]], max_is_best=False)
d2 = bold_best(ddff2[["DT2", "RobDT2", "Rudin's2", "BBM2"]], max_is_best=True)
d3 = bold_best(ddff2[["DT3", "RobDT3", "Rudin's3", "BBM3"]], max_is_best=True)
ddff3 = pd.concat((d1, d2, d3), axis=1)
print(ddff3.to_latex(escape=False, column_format="lcccccccccccc"))
ddff3

\begin{tabular}{lcccccccccccc}
\toprule
{} &        DT &            RobDT &          Rudin's &              BBM &              DT2 &           RobDT2 &         Rudin's2 &             BBM2 &              DT3 &           RobDT3 & Rudin's3 &             BBM3 \\
\midrule
adult        &  $414.20$ &         $287.90$ &          $14.90$ &  $\mathbf{6.00}$ &  $\mathbf{0.83}$ &  $\mathbf{0.83}$ &           $0.82$ &           $0.81$ &  $\mathbf{0.50}$ &  $\mathbf{0.50}$ &   $0.12$ &  $\mathbf{0.50}$ \\
bank         &   $30.70$ &          $26.80$ &           $8.90$ &  $\mathbf{8.00}$ &  $\mathbf{0.90}$ &  $\mathbf{0.90}$ &  $\mathbf{0.90}$ &  $\mathbf{0.90}$ &  $\mathbf{0.50}$ &  $\mathbf{0.50}$ &   $0.20$ &  $\mathbf{0.50}$ \\
bank_2       &   $30.00$ &          $30.70$ &          $13.80$ &  $\mathbf{4.50}$ &  $\mathbf{0.91}$ &           $0.90$ &           $0.90$ &           $0.90$ &           $0.12$ &           $0.18$ &   $0.10$ &  $\mathbf{0.50}$ \\
breastcancer &   $15.20$ &           $7.40$ &

Unnamed: 0,DT,RobDT,Rudin's,BBM,DT2,RobDT2,Rudin's2,BBM2,DT3,RobDT3,Rudin's3,BBM3
adult,$414.20$,$287.90$,$14.90$,$\mathbf{6.00}$,$\mathbf{0.83}$,$\mathbf{0.83}$,$0.82$,$0.81$,$\mathbf{0.50}$,$\mathbf{0.50}$,$0.12$,$\mathbf{0.50}$
bank,$30.70$,$26.80$,$8.90$,$\mathbf{8.00}$,$\mathbf{0.90}$,$\mathbf{0.90}$,$\mathbf{0.90}$,$\mathbf{0.90}$,$\mathbf{0.50}$,$\mathbf{0.50}$,$0.20$,$\mathbf{0.50}$
bank_2,$30.00$,$30.70$,$13.80$,$\mathbf{4.50}$,$\mathbf{0.91}$,$0.90$,$0.90$,$0.90$,$0.12$,$0.18$,$0.10$,$\mathbf{0.50}$
breastcancer,$15.20$,$7.40$,$\mathbf{6.00}$,$11.00$,$0.94$,$0.94$,$\mathbf{0.96}$,$\mathbf{0.96}$,$0.23$,$\mathbf{0.29}$,$0.28$,$0.27$
careval,$59.30$,$28.20$,$10.10$,$\mathbf{8.70}$,$\mathbf{0.97}$,$0.96$,$0.91$,$0.77$,$\mathbf{0.50}$,$\mathbf{0.50}$,$0.19$,$\mathbf{0.50}$
compasbin,$67.80$,$33.70$,$\mathbf{5.40}$,$7.60$,$\mathbf{0.67}$,$\mathbf{0.67}$,$0.65$,$0.66$,$\mathbf{0.50}$,$\mathbf{0.50}$,$0.15$,$0.33$
diabetes,$31.20$,$27.90$,$6.00$,$\mathbf{2.10}$,$0.74$,$0.73$,$\mathbf{0.76}$,$0.65$,$0.08$,$0.08$,$0.09$,$\mathbf{0.15}$
ficobin,$30.60$,$59.60$,$\mathbf{6.40}$,$11.80$,$0.71$,$0.71$,$0.71$,$\mathbf{0.72}$,$\mathbf{0.50}$,$\mathbf{0.50}$,$0.22$,$\mathbf{0.50}$
heart,$20.30$,$13.60$,$11.90$,$\mathbf{9.50}$,$0.76$,$0.79$,$\mathbf{0.82}$,$\mathbf{0.82}$,$0.23$,$0.31$,$0.14$,$\mathbf{0.32}$
ionosphere,$11.30$,$8.60$,$17.90$,$\mathbf{6.80}$,$0.89$,$\mathbf{0.92}$,$0.88$,$0.86$,$0.15$,$0.25$,$0.07$,$\mathbf{0.28}$


In [66]:
res = joblib.load("../results/lin_sep_bbm_rob_3/risk_compasbin-rminmax-4.pkl")
res['bbm_results']['er dist']

array([0.50000001, 0.50000001, 0.10010001, 0.50000001, 0.50000001,
       0.50010001, 0.10010001, 0.50000001, 0.10010001, 0.50000001,
       0.10010001, 0.10010001, 0.10010001, 0.50010001, 0.50010001,
       0.10010001, 0.50000001, 0.50000001, 0.10010001, 0.50000001,
       0.50000001, 0.50000001, 0.10010001, 0.10010001, 0.50010001,
       0.50000001, 0.10010001, 0.10010001, 0.50000001, 0.10010001,
       0.50010001, 0.50000001, 0.50000001, 0.10010001, 0.10010001,
       0.50000001, 0.10010001, 0.10010001, 0.50000001, 0.50000001,
       0.50000001, 0.50000001, 0.50000001, 0.10010001, 0.50000001,
       0.50000001, 0.50000001, 0.50000001, 0.10010001, 0.50000001,
       0.10010001, 0.10010001, 0.50000001, 0.50000001, 0.50000001,
       0.50000001, 0.10010001, 0.10010001, 0.50000001, 0.50000001,
       0.50000001, 0.10010001, 0.50000001, 0.10010001, 0.10010001,
       0.10010001, 0.10010001, 0.50000001, 0.50000001, 0.50000001,
       0.10010001, 0.50000001, 0.50000001, 0.10010001, 0.50000

In [28]:
res = joblib.load("../results/risk_slim_3/risk_bank_2-rminmax-0.pkl")
(res['linsep_results'][0]['w'] != 0).sum()

14

In [None]:
df[df['dataset'] == 'risk_twogauss_cont-2000-1.0']

In [None]:
dset_curve = {}
dset_ori_curve = {}
df = display_results(LinSepBBMExperiments, '../results/lin_sep_bbm', ['n_samples', 'linsep_results'])
for i, d in df.iterrows():
    cs = [None, 1e-6, 1e-4, 1e-2, 1e-0, 1e2, 1e4, 1e6, 1e8, 1e10]
    c_n_examples = {}
    for c in cs:
        for ent in d['linsep_results']:
            if ent['c'] == c:
                break
        c_n_examples.setdefault(c, []).append(ent['n_trn_samples'])

    n_samples = []
    for c in cs:
        n_samples.append(c_n_examples[c])
    n_samples = np.asarray(n_samples).reshape(-1)

    if 'dif_depth' in d["linsep_results"][n_samples[1:].argmax()]:
        temp = np.asarray([data['tst acc'] for data in d["linsep_results"][n_samples[1:].argmax()]['dif_depth'] if data])
        curve = np.zeros(100)
        curve[:len(temp)] = temp
        curve[len(temp):] = temp[-1]
        dset_curve.setdefault(d['dataset'], []).append(curve)

        temp = np.asarray([data['tst acc'] for data in d["linsep_results"][0]['dif_depth'] if data])
        curve = np.zeros(100)
        curve[:len(temp)] = temp
        curve[len(temp):] = temp[-1]
        dset_ori_curve.setdefault(d['dataset'], []).append(curve)

        #d["linsep_results"][n_samples[1:].argmax()]['learned_model']


In [11]:
for k, v in dset_curve.items():
    plt.title(k, fontsize=fontsize)
    #plt.plot(np.mean(v, axis=0), label="lin sep")
    #plt.plot(np.mean(dset_ori_curve[k], axis=0), label="ori")
    plt.errorbar(x=np.arange(100), y=np.mean(dset_ori_curve[k], axis=0), yerr=scipy.stats.sem(dset_ori_curve[k], axis=0), label="ori")
    plt.xlabel('# weak learners', fontsize=fontsize)
    plt.ylabel('test accuracy', fontsize=fontsize)
    #plt.legend(fontsize=fontsize)
    plt.tight_layout()
    plt.savefig(f"./figs/bbm_acc_number/{k}.png")
    plt.close()

In [40]:
for i, d in df.iterrows():
    d["linsep_results"][n_samples[1:].argmax()]['learned_model']


KeyError: 'learned_model'