Table maker

In [7]:
import json
from pathlib import Path

import pandas as pd
from matplotlib import pyplot as plt

results_dir = Path(".", "results_minimal")
scaling_transformations = set([str(file).split("_")[2] for file in list(results_dir.glob("*.json"))])
scaling_transformations

{'MaxAbsScaler',
 'MinMaxScaler',
 'QuantileTransformer',
 'RobustScaler',
 'StandardScaler'}

Table with results for each scaler

In [8]:
pandas_tables = []
for scaler in scaling_transformations:
    scaler_table = {"classifier": [],
                    "maxdiff": [],
                    "mindiff": [],
                    "meandiff": [],
                    "stddiff": [],
                    "bestdiff": [],
                    "bestcorrect": [],
                    "meancorrect": [],
                    "worstcorrect": []}
    for result in results_dir.glob(f"*{scaler}*"):
        with open(result, "r") as f:
            clf_results = json.load(f)
            to_pandas = {"seed": [], "bcc_correct": [], "bcc_leakage": []}
            for seed_experiment in clf_results.keys():
                to_pandas["seed"].append(int(seed_experiment))
                to_pandas["bcc_correct"].append(clf_results[seed_experiment]["bcc"]["correct"])
                to_pandas["bcc_leakage"].append(clf_results[seed_experiment]["bcc"]["leakage"])

            pd_data = pd.DataFrame(to_pandas)
            scaler_table["classifier"].append(str(result.stem).split("_")[4])
            diff = pd_data["bcc_leakage"] - pd_data["bcc_correct"]
            # plt.figure()
            # plt.hist(diff, bins=100)
            # plt.title(f"{result.stem} - {scaler}")

            scaler_table["maxdiff"].append(diff.max())
            scaler_table["mindiff"].append(diff.min())
            scaler_table["meandiff"].append(diff.mean())
            scaler_table["stddiff"].append(diff.std())
            scaler_table["bestdiff"].append(pd_data["bcc_leakage"].max() - pd_data["bcc_correct"].max())
            scaler_table["bestcorrect"].append(pd_data["bcc_correct"].max())
            scaler_table["meancorrect"].append(pd_data["bcc_correct"].mean())
            scaler_table["worstcorrect"].append(pd_data["bcc_correct"].min())

        pd_to_dump = pd.DataFrame(scaler_table).to_csv(f"scaler_{scaler}.csv")
plt.show()