In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

In [2]:
from utils import generate_table, Cell, to_latex, merge_cols

In [12]:
data = pd.read_csv("../../results/adult/balanced-eval.csv")

In [13]:
data["Group"].value_counts()

perfect-cluster.old.on_recon.balanced_eval.miss_s     30
perfect-cluster.old.on_recon.balanced_eval.partial    30
kmeans-fdm.old.on_recon.balanced_eval.miss_s          30
no-cluster-fdm.old.on_recon.balanced_eval.miss_s      30
ranking-fdm.old.on_recon.balanced_eval.miss_s         30
no-cluster-fdm.old.on_recon.balanced_eval.partial     30
kmeans-fdm.old.on_recon.balanced_eval.partial         30
ranking-fdm.old.on_recon.balanced_eval.partial        30
Name: Group, dtype: int64

In [14]:
data.columns

Index(['Name', 'Created', 'Runtime', 'Group', 'bias.log_dataset',
       'bias.missing_s', 'clust.method', 'fdm.balanced_context',
       'misc.log_method', 'Accuracy (LRCV)',
       'Accuracy (Logistic Regression (C=1.0))',
       'Accuracy (pytorch_classifier)', 'Accuracy Discriminator (zy)',
       'Accuracy_sex_Male_0.0 (LRCV)',
       'Accuracy_sex_Male_0.0 (Logistic Regression (C=1.0))',
       'Accuracy_sex_Male_0.0-sex_Male_1.0 (LRCV)',
       'Accuracy_sex_Male_0.0-sex_Male_1.0 (Logistic Regression (C=1.0))',
       'Accuracy_sex_Male_0.0-sex_Male_1.0 (pytorch_classifier)',
       'Accuracy_sex_Male_0.0÷sex_Male_1.0 (LRCV)',
       'Accuracy_sex_Male_0.0÷sex_Male_1.0 (Logistic Regression (C=1.0))',
       'Accuracy_sex_Male_0.0÷sex_Male_1.0 (pytorch_classifier)',
       'Accuracy_sex_Male_1.0 (LRCV)',
       'Accuracy_sex_Male_1.0 (Logistic Regression (C=1.0))',
       'Accuracy_sex_Male_1.0 (pytorch_classifier)', 'Clust/Context ARI',
       'Clust/Context Acc y=0 s=0', 'Clust

In [15]:
data["misc.log_method"].value_counts()

no-cluster-fdm     60
ranking-fdm        60
kmeans-fdm         60
perfect-cluster    60
Name: misc.log_method, dtype: int64

In [16]:
data = data.replace({"misc.log_method": {
    "kmeans-fdm": "Kmeans",
    "ranking-fdm": "Ranking",
    "no-cluster-fdm": "no cluster",
    "perfect-cluster": "groundtruth cluster",
}}, inplace=False)

In [17]:
def table(data, filter_, cols):
    return (
        data
        .query(filter_)
        .groupby("Group")
        .agg(Cell(round_to=3))[list(cols)].rename(columns=cols, inplace=False)
    )

In [18]:
cols = {
    'Clust/Context Accuracy': "Clust Acc",
    'Clust/Context NMI': "Clust NMI",
    'Clust/Context ARI': "Clust ARI",
    "Accuracy (Logistic Regression (C=1.0))": "Accuracy",
    'prob_pos_sex_Male_0.0÷sex_Male_1.0 (Logistic Regression (C=1.0))': "AR ratio",
    "Renyi preds and s (Logistic Regression (C=1.0))": "Renyi corr",
    "TPR_sex_Male_0.0÷sex_Male_1.0 (Logistic Regression (C=1.0))": "TPR ratio",
    'TNR_sex_Male_0.0÷sex_Male_1.0 (Logistic Regression (C=1.0))': "TNR ratio",
}

In [19]:
res = (data.query("`bias.log_dataset`.str.contains('miss_s')")[["misc.log_method"] + list(cols)]
        .groupby(["misc.log_method"])
        .agg(Cell(round_to=3)).rename(columns=cols, inplace=False)
        .reset_index(level=["misc.log_method"], inplace=False)
        .rename(columns={"misc.log_method": "Method"}, inplace=False)
       )
res

Unnamed: 0,Method,Clust Acc,Clust NMI,Clust ARI,Accuracy,AR ratio,Renyi corr,TPR ratio,TNR ratio
0,Kmeans,0.373 $\pm$ 0.035,0.025 $\pm$ 0.014,0.017 $\pm$ 0.01,0.777 $\pm$ 0.028,0.843 $\pm$ 0.105,0.076 $\pm$ 0.053,0.885 $\pm$ 0.101,0.912 $\pm$ 0.046
1,Ranking,0.606 $\pm$ 0.01,0.259 $\pm$ 0.008,0.23 $\pm$ 0.005,0.775 $\pm$ 0.015,0.801 $\pm$ 0.082,0.091 $\pm$ 0.039,0.868 $\pm$ 0.086,0.911 $\pm$ 0.037
2,groundtruth cluster,,,,0.767 $\pm$ 0.015,0.81 $\pm$ 0.063,0.081 $\pm$ 0.029,0.872 $\pm$ 0.06,0.923 $\pm$ 0.032
3,no cluster,,,,0.768 $\pm$ 0.024,0.805 $\pm$ 0.124,0.095 $\pm$ 0.061,0.851 $\pm$ 0.131,0.912 $\pm$ 0.035


In [25]:
print(res.to_latex(index=False, escape=False))

\begin{tabular}{lllllllll}
\toprule
              Method &          Clust Acc &          Clust NMI &         Clust ARI &           Accuracy &           AR ratio &         Renyi corr &          TPR ratio &          TNR ratio \\
\midrule
              Kmeans &  0.373 $\pm$ 0.035 &  0.025 $\pm$ 0.014 &  0.017 $\pm$ 0.01 &  0.777 $\pm$ 0.028 &  0.843 $\pm$ 0.105 &  0.076 $\pm$ 0.053 &  0.885 $\pm$ 0.101 &  0.912 $\pm$ 0.046 \\
             Ranking &   0.606 $\pm$ 0.01 &  0.259 $\pm$ 0.008 &  0.23 $\pm$ 0.005 &  0.775 $\pm$ 0.015 &  0.801 $\pm$ 0.082 &  0.091 $\pm$ 0.039 &  0.868 $\pm$ 0.086 &  0.911 $\pm$ 0.037 \\
 groundtruth cluster &                N/A &                N/A &               N/A &  0.767 $\pm$ 0.015 &   0.81 $\pm$ 0.063 &  0.081 $\pm$ 0.029 &   0.872 $\pm$ 0.06 &  0.923 $\pm$ 0.032 \\
          no cluster &                N/A &                N/A &               N/A &  0.768 $\pm$ 0.024 &  0.805 $\pm$ 0.124 &  0.095 $\pm$ 0.061 &  0.851 $\pm$ 0.131 &  0.912 $\pm$ 0.035 \\


In [3]:
# baseline files
missing_demo = {
    "Kamiran": "adult_Kamiran & Calders LR_lr_baselines.csv",
    "LRCV": "adult_LRCV_lr_baselines.csv",
    "LR": "adult_Logistic Regression (C=1.0)_lr_baselines.csv",
    "Majority": "adult_Majority_lr_baselines.csv",
    "SVM (lin)": "adult_SVM (linear)_lr_baselines.csv",
    "SVM": "adult_SVM_lr_baselines.csv",
    "MLP": "adult_cnn_baseline_60epochs.csv",
    "DRO": "adult_dro_baseline_60epochs.csv",
}
partial_outcome = {
    "Kamiran": "adult_Kamiran & Calders LR_lr_baselines.csv",
    "LRCV": "adult_LRCV_lr_baselines.csv",
    "LR": "adult_Logistic Regression (C=1.0)_lr_baselines.csv",
    "Majority": "adult_Majority_lr_baselines.csv",
    "SVM (lin)": "adult_SVM (linear)_lr_baselines.csv",
    "SVM": "adult_SVM_lr_baselines.csv",
    "MLP": "adult_cnn_baseline_60epochs.csv",
    "DRO": "adult_dro_baseline_60epochs.csv",
}

In [4]:
def collate(file_dict, exp_group, dir_):
    df_all = pd.DataFrame()
    for log_method, filename in file_dict.items():
        df = pd.read_csv(Path("../../results/adult") / dir_ / filename)
        df.insert(0, "exp_group", exp_group)
        df.insert(0, "log_method", log_method)
        if log_method == "DRO":
            df["log_method"] = df["log_method"] + " (" + df["eta"].astype(str) + ")"
        df_all = pd.concat([df_all, df], axis="index", ignore_index=True, sort=False)
    return df_all

In [5]:
baselines = pd.concat([collate(missing_demo, "miss_s", "missing_demo"), collate(partial_outcome, "partial", "partial_outcome")])

In [6]:
merge_cols(baselines, 'prob_pos_sex_Male_0.0÷sex_Male_1.0', 'prob_pos_sens_Label_0.0÷sens_Label_1.0')
merge_cols(baselines, 'TPR_sex_Male_0.0÷sex_Male_1.0', 'TPR_sens_Label_0.0÷sens_Label_1.0')
merge_cols(baselines, 'TNR_sex_Male_0.0÷sex_Male_1.0', 'TNR_sens_Label_0.0÷sens_Label_1.0')

In [9]:
bl_cols = {
 'Accuracy': "Accuracy",
 'Renyi preds and s': "Renyi",
 'prob_pos_sex_Male_0.0÷sex_Male_1.0': "AR ratio",
 "Renyi preds and s": "Renyi corr",
 'TPR_sex_Male_0.0÷sex_Male_1.0': "TPR ratio",
 'TNR_sex_Male_0.0÷sex_Male_1.0': "TNR ratio",
}

In [10]:
res2 = (baselines.query("exp_group == 'miss_s'")[["log_method"] + list(bl_cols)]
        .groupby(["log_method"])
        .agg(Cell(round_to=3)).rename(columns=bl_cols, inplace=False)
        .reset_index(level=["log_method"], inplace=False)
        .rename(columns={"log_method": "Method"}, inplace=False)
       )
res2

Unnamed: 0,Method,Accuracy,Renyi corr,AR ratio,TPR ratio,TNR ratio
0,DRO (0.01),0.802 $\pm$ 0.018,0.133 $\pm$ 0.057,0.752 $\pm$ 0.102,0.854 $\pm$ 0.097,0.839 $\pm$ 0.038
1,DRO (0.1),0.801 $\pm$ 0.018,0.134 $\pm$ 0.058,0.748 $\pm$ 0.104,0.852 $\pm$ 0.1,0.839 $\pm$ 0.037
2,DRO (0.3),0.801 $\pm$ 0.02,0.142 $\pm$ 0.057,0.735 $\pm$ 0.103,0.838 $\pm$ 0.104,0.839 $\pm$ 0.036
3,DRO (1.0),0.5 $\pm$ 0.0,1.0 $\pm$ 0.0,1.0 $\pm$ 0.0,1.0 $\pm$ 0.0,1.0 $\pm$ 0.0
4,Kamiran,0.735 $\pm$ 0.015,0.153 $\pm$ 0.034,0.62 $\pm$ 0.075,0.685 $\pm$ 0.078,0.919 $\pm$ 0.019
5,LR,0.735 $\pm$ 0.015,0.153 $\pm$ 0.034,0.62 $\pm$ 0.075,0.685 $\pm$ 0.078,0.919 $\pm$ 0.019
6,LRCV,0.732 $\pm$ 0.023,0.161 $\pm$ 0.047,0.601 $\pm$ 0.112,0.662 $\pm$ 0.11,0.92 $\pm$ 0.023
7,MLP,0.801 $\pm$ 0.018,0.136 $\pm$ 0.057,0.746 $\pm$ 0.101,0.846 $\pm$ 0.095,0.841 $\pm$ 0.039
8,Majority,0.5 $\pm$ 0.0,1.0 $\pm$ 0.0,,,1.0 $\pm$ 0.0
9,SVM,0.714 $\pm$ 0.012,0.171 $\pm$ 0.02,0.554 $\pm$ 0.043,0.617 $\pm$ 0.046,0.925 $\pm$ 0.014


In [21]:
pd.concat([res, res2]).reset_index(drop=True)

Unnamed: 0,Method,Clust Acc,Clust NMI,Clust ARI,Accuracy,AR ratio,Renyi corr,TPR ratio,TNR ratio
0,Kmeans,0.373 $\pm$ 0.035,0.025 $\pm$ 0.014,0.017 $\pm$ 0.01,0.777 $\pm$ 0.028,0.843 $\pm$ 0.105,0.076 $\pm$ 0.053,0.885 $\pm$ 0.101,0.912 $\pm$ 0.046
1,Ranking,0.606 $\pm$ 0.01,0.259 $\pm$ 0.008,0.23 $\pm$ 0.005,0.775 $\pm$ 0.015,0.801 $\pm$ 0.082,0.091 $\pm$ 0.039,0.868 $\pm$ 0.086,0.911 $\pm$ 0.037
2,groundtruth cluster,,,,0.767 $\pm$ 0.015,0.81 $\pm$ 0.063,0.081 $\pm$ 0.029,0.872 $\pm$ 0.06,0.923 $\pm$ 0.032
3,no cluster,,,,0.768 $\pm$ 0.024,0.805 $\pm$ 0.124,0.095 $\pm$ 0.061,0.851 $\pm$ 0.131,0.912 $\pm$ 0.035
4,DRO (0.01),,,,0.802 $\pm$ 0.018,0.752 $\pm$ 0.102,0.133 $\pm$ 0.057,0.854 $\pm$ 0.097,0.839 $\pm$ 0.038
5,DRO (0.1),,,,0.801 $\pm$ 0.018,0.748 $\pm$ 0.104,0.134 $\pm$ 0.058,0.852 $\pm$ 0.1,0.839 $\pm$ 0.037
6,DRO (0.3),,,,0.801 $\pm$ 0.02,0.735 $\pm$ 0.103,0.142 $\pm$ 0.057,0.838 $\pm$ 0.104,0.839 $\pm$ 0.036
7,DRO (1.0),,,,0.5 $\pm$ 0.0,1.0 $\pm$ 0.0,1.0 $\pm$ 0.0,1.0 $\pm$ 0.0,1.0 $\pm$ 0.0
8,Kamiran,,,,0.735 $\pm$ 0.015,0.62 $\pm$ 0.075,0.153 $\pm$ 0.034,0.685 $\pm$ 0.078,0.919 $\pm$ 0.019
9,LR,,,,0.735 $\pm$ 0.015,0.62 $\pm$ 0.075,0.153 $\pm$ 0.034,0.685 $\pm$ 0.078,0.919 $\pm$ 0.019
