In [None]:
!apt update
!apt-get install dvipng texlive-latex-extra texlive-fonts-recommended cm-super
!pip install -U plotly kaleido
!plotly_get_chrome -y

Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Hit:2 https://cli.github.com/packages stable InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease [24.3 kB]
Get:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ Packages [80.2 kB]
Get:13 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64

Installing Chrome for Plotly...
Chrome installed successfully.
The Chrome executable is now located at: /usr/local/lib/python3.12/dist-packages/choreographer/cli/browser_exe/chrome-linux64/chrome


In [None]:
# warning filters
import warnings
warnings.filterwarnings("ignore", message="Pandas requires version")
warnings.filterwarnings("ignore", message="A NumPy version >=")
warnings.filterwarnings("ignore", message="invalid value encountered in subtract")
warnings.filterwarnings("ignore", category=UserWarning, message="set_ticklabels\\(\\) should only be used with a fixed number of ticks")

import re

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.io as pio
import plotly.graph_objects as go

plt.rcParams["mathtext.fontset"] = "cm"
plt.rc('text', usetex=True)
plt.rc('font', family='serif')


In [None]:
plt.rcParams["mathtext.fontset"] = "cm"
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

# Load metrics-final.csv from drive
dataset = "BoD"
synth="mst"
extra = "Combo_6"
df = pd.read_csv(f"metrics_{synth}_{dataset}{'_' + extra if extra else ''}.csv")
dp_method = df["DP-Method"].unique()[1]

# Remove rows with any 'inf' value
def remove_inf_rows(df):
    mask = df.map(lambda x: 'inf' in str(x).lower())
    return df[~mask.any(axis=1)]

df = remove_inf_rows(df)

def smart_capitalize(text):
    def cap_word(word):
        return word[0].upper() + word[1:] if word else ''

    # This will split and keep the delimiters (space, dash, underscore)
    parts = re.split(r'([ _-])', text)
    return ''.join(cap_word(part) if part not in ' _-' else part for part in parts)


# Categorize each method
def categorize_method(epsilon, fair_method, dp_method):
    has_dp = pd.notnull(epsilon)
    has_fair = pd.notnull(fair_method) and str(fair_method).strip() != ''
    if not has_dp and not has_fair:
        return 'Baseline'

    dp_prefix = 'DP[' if has_dp else ''

    if has_fair:
        fm = str(fair_method).strip().lower()
        if fm == 'reweigh':
            fair_code = 'RW'
        elif fm == 'eqodds':
            fair_code = 'EqOdds'
        else:
            fair_code = fm.upper()
        return f"{dp_prefix}{fair_code}" + f"{']' if has_dp else ''}"
    else:
        return "DP"

df["method"] = df.apply(lambda row: categorize_method(row['Epsilon'], row['Fair-Method'], row['DP-Method']), axis=1)
df["eps"] = df["Epsilon"].round(2)
df_filtered = df[df["method"].notnull()].copy()

# --- Bar weight formatting ---
from itertools import zip_longest

def text_label_weight(a, b, c=None):
    weight1, weight2, weight3 = [], [], []

    # Preprocess arrays
    a = [float('inf') if x is None else abs(x) for x in a]
    b = [float('inf') if x is None else abs(x) for x in b]
    if c is not None:
        c = [float('inf') if x is None else abs(x) for x in c]

    # Iterate over longest list, pad missing with inf
    if c is not None:
        for a1, b1, c1 in zip_longest(a, b, c, fillvalue=float('inf')):
            min_val = min(a1, b1, c1)
            if min_val == float('inf'):
                weight1.append('normal')
                weight2.append('normal')
                weight3.append('normal')
            else:
                weight1.append('normal' if a1 > min_val else 'bold')
                weight2.append('normal' if b1 > min_val else 'bold')
                weight3.append('normal' if c1 > min_val else 'bold')
        return weight1, weight2, weight3
    else:
        for a1, b1 in zip_longest(a, b, fillvalue=float('inf')):
            min_val = min(a1, b1)
            if min_val == float('inf'):
                weight1.append('normal')
                weight2.append('normal')
            else:
                weight1.append('normal' if a1 > min_val else 'bold')
                weight2.append('normal' if b1 > min_val else 'bold')
        return weight1, weight2


def text_label_color(a, b, c=None, base=None):
    color1, color2, color3 = [], [], []

    # Preprocess arrays
    a = [float('inf') if x is None else abs(x) for x in a]
    b = [float('inf') if x is None else abs(x) for x in b]
    if c is not None:
        c = [float('inf') if x is None else abs(x) for x in c]

    # Iterate over longest list, pad missing with inf
    if c is not None:
        for a1, b1, c1 in zip_longest(a, b, c, fillvalue=float('inf')):
            color1.append('black' if abs(a1) < abs(base) else 'red')
            color2.append('black' if abs(b1) < abs(base) else 'red')
            color3.append('black' if abs(c1) < abs(base) else 'red')
        return color1, color2, color3
    else:
        for a1, b1 in zip_longest(a, b, fillvalue=float('inf')):
            color1.append('black' if abs(a1) < abs(base) else 'red')
            color2.append('black' if abs(b1) < abs(base) else 'red')
        return color1, color2

# --- Define method groups and labels ---
groups = ["Baseline", "RW", "DIR", "LFR", "EGR", "GSR", "ROC", "EqOdds", "CEOP", "DP", "DP[RW]", "DP[DIR]", "DP[LFR]", "DP[EGR]", "DP[GSR]", "DP[ROC]", "DP[EqOdds]", "DP[CEOP]"]

In [None]:
# Common settings
# --- Define epsilon values and metric to plot ---
epsilons = [0.25, 0.5, 0.75, 1, 5, 10, 15, 20]
metrics  = ["MAD", "SPD", "EOD", "ACC", "PREC", "REC"]

metric_labels = {
    "ACC": "Accuracy",
    "PREC": "Precision",
    "REC": "Recall",
    "SPD": "Statistical Parity Difference (SPD)",
    "MAD": "Model Accuracy Difference (MAD)",
    "EOD": "Equal Opportunity Difference (EOD)"
}

colors = [
    "#A0342B",  # Base / original
    # Yellows (light → dark)
    "#F1E3A1",  # Yellow 1, light
    "#E5D16C",  # Yellow 2, mid
    "#C9AA3E",  # Yellow 3, dark
    # Greens (light → dark)
    "#73BA9B",  # Green 1, light
    "#003E1F",  # Green 2, dark
    # Blues (light → dark)
    "#A3C2E5",  # Blue 1, light
    "#5A6FD1",  # Blue 2, mid
    "#1E2AA0",  # Blue 3, dark
]
mapped_colors = [colors[0], colors[1], colors[2], colors[3], colors[4], colors[5], colors[6], colors[7], colors[8], colors[0], colors[1], colors[2], colors[3], colors[4], colors[5], colors[6], colors[7], colors[8]]

for eps in epsilons:
    # Fix an eps and select samples with that eps and baselines (when eps is not used)
    sub = pd.concat([
        df_filtered[df_filtered["eps"] == round(eps, 2)],
        df_filtered[df_filtered["eps"].isnull()]
    ], ignore_index=True)

    methods_present = set(sub["method"].unique())
    dp_sub = df_filtered[(df_filtered["eps"] == round(eps, 2)) & (df_filtered["method"] == "DP")]

    fig = make_subplots(
        rows=3, cols=2, subplot_titles=metrics,
        horizontal_spacing=0.05, vertical_spacing=0.08
    )

    palette = sns.color_palette("tab10", 10)

    row_counter = 1
    col_counter = 0
    for metric in metrics:
        col_counter += 1
        if col_counter > 2:
            col_counter = 1
            row_counter += 1

        # Compute averages
        y = [sub.loc[sub["method"] == m, metric].mean() if m and m in methods_present else None for m in groups]
        fig.add_trace(
            go.Bar(x=groups, y=y, textangle=0, marker=dict(color=mapped_colors)),
            row=row_counter, col=col_counter,
        )


    # Apply layout once for all
    fig.update_layout(
        barmode="group",
        barcornerradius=8,
        showlegend=False,
        height=1200,  # more space for 6 subplots
        margin=dict(l=60, r=60, t=80, b=50),
        autosize=True,
        title={
            "text": f"{dp_method.upper()} ε = {eps} | {dataset.upper()}",
            "x": 0.5,             # center horizontally
            "xanchor": "center",
            "yanchor": "top",
            "font": {
                "size": 24,
                "family": "Arial, sans-serif",
                "color": "black",
                "weight": "bold"
            }
        }
    )
    out_file = f'fig_{synth}_eps_{eps}_{dataset}{"_"+extra if extra else ""}.pdf'
    pio.write_image(fig, out_file, format='pdf', height=1080, width=1920)
    fig.show()

In [None]:
from google.colab import files
!mkdir pdf
!mv *.pdf pdf
!zip pdf.zip pdf/*
files.download(f'pdf.zip')

  adding: pdf-bod/fig_aim_eps_0.25_BoD_Combo_1.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.25_BoD_Combo_2.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.25_BoD_Combo_3.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.25_BoD_Combo_4.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.25_BoD_Combo_5.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.25_BoD_Combo_6.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.5_BoD_Combo_1.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.5_BoD_Combo_2.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.5_BoD_Combo_3.pdf (deflated 4%)
  adding: pdf-bod/fig_aim_eps_0.5_BoD_Combo_4.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.5_BoD_Combo_5.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.5_BoD_Combo_6.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.75_BoD_Combo_1.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.75_BoD_Combo_2.pdf (deflated 3%)
  adding: pdf-bod/fig_aim_eps_0.75_BoD_Combo_3.pdf (deflated 4%)
  adding: pdf-bod/fig_aim_eps_0

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>