In [1]:
import pandas as pd

from src.constant import MAIN_DIR
from src.database import DB
from tqdm.auto import tqdm

PHASE2_DIR = MAIN_DIR / "archive" / "phase2"
N_TRAIN_LIST = ["25", "100"]
POLICY_LIST = ["baseline", "ea", "eb", "ec", "ia", "ib"]

In [None]:
import numpy as np
rng = np.random.default_rng(42)  # Set a random seed for reproducibility



383329927

In [49]:
rng.integers(0, 2**32 -1)

395720737

In [2]:
records = []
total_iterations = len(N_TRAIN_LIST) * len(POLICY_LIST)
progress_bar = tqdm(total=total_iterations, desc="Processing")

for n_train in N_TRAIN_LIST:
    for policy in POLICY_LIST:
        # Update description to show current n_train and policy
        progress_bar.set_description(f"n_train={n_train}, policy={policy}")

        db_path_list = list((PHASE2_DIR / n_train / policy).glob("run-policy-*.db"))

        for db_path in db_path_list:
            db = DB(db_path)
            results = db.get_results()
            results_test = results.loc[results["prefix"].str.startswith("test")]
            if results_test.empty:
                print(f"No results {db_path}")
                continue
            cost = (
                results.loc[results["prefix"].str.startswith("test")]
                .groupby(["instance_id", "prefix"])["cost"]
                .min()  # min for every problem (of 2 solvers)
                .reset_index()
                .groupby("instance_id")["cost"]
                .median()  # median over 5 runs
                .mean()  # total mean score
            )
            cpu_time = (
                results.loc[
                    results["prefix"].str.startswith("config")
                    & results["cached"].eq(0)
                    & results["surrogate"].eq(0),
                    "time",
                ].sum()
                / 3600
            )
            surrogate_pct = results.loc[results["prefix"].str.startswith("config")].groupby(["solver_id", "instance_id"])["surrogate"].max().mean()
            real_pct = results.loc[results["prefix"].str.startswith("config")].groupby(["solver_id", "instance_id"])["surrogate"].min().eq(0).mean()
            records.append(
                {
                    "db_path": "/".join(db_path.parts[-3:]),
                    "n_train": n_train,
                    "policy": policy,
                    "cost": cost,
                    "cpu_time": cpu_time,
                    "surrogate_pct": surrogate_pct,
                    "real_pct": real_pct,
                }
            )
        progress_bar.update(1)

progress_bar.close()

df = pd.DataFrame(records)
df

Processing:   0%|          | 0/12 [00:00<?, ?it/s]

Unnamed: 0,db_path,n_train,policy,cost,cpu_time,surrogate_pct,real_pct
0,25/baseline/run-policy-baseline-25-1012821.db,25,baseline,0.20404,1.085407,0.000000,1.000000
1,25/baseline/run-policy-baseline-25-1012829.db,25,baseline,0.18832,1.231176,0.000000,1.000000
2,25/baseline/run-policy-baseline-25-1012830.db,25,baseline,0.73652,1.443054,0.000000,1.000000
3,25/baseline/run-policy-baseline-25-1012832.db,25,baseline,0.30916,0.832994,0.000000,1.000000
4,25/baseline/run-policy-baseline-25-1013020.db,25,baseline,0.20116,1.118927,0.000000,1.000000
...,...,...,...,...,...,...,...
92,100/ib/run-policy-ib-100-1013189.db,100,ib,0.16168,1.515501,0.905660,0.433962
93,100/ib/run-policy-ib-100-1013195.db,100,ib,0.29312,1.845759,0.918033,0.475410
94,100/ib/run-policy-ib-100-1013261.db,100,ib,0.54144,2.299865,0.907407,0.685185
95,100/ib/run-policy-ib-100-1013267.db,100,ib,0.15760,1.967770,0.939024,0.402439


In [3]:
df.pivot_table(index="policy", columns="n_train", values="cpu_time", aggfunc="count")

n_train,100,25
policy,Unnamed: 1_level_1,Unnamed: 2_level_1
baseline,8,9
ea,8,9
eb,8,9
ec,5,7
ia,8,9
ib,8,9


In [5]:
df.pivot_table(index="policy", columns="n_train", values="cpu_time", aggfunc="mean")

n_train,100,25
policy,Unnamed: 1_level_1,Unnamed: 2_level_1
baseline,4.408675,1.070346
ea,3.207086,0.816572
eb,2.123313,0.587298
ec,11.848141,1.64492
ia,3.053472,0.74271
ib,1.729091,0.410029


In [6]:
df.pivot_table(index="policy", columns="n_train", values="cost", aggfunc="mean")

n_train,100,25
policy,Unnamed: 1_level_1,Unnamed: 2_level_1
baseline,0.273695,0.32416
ea,0.33009,0.349618
eb,0.356395,0.361996
ec,0.264384,0.198086
ia,0.23379,0.24392
ib,0.3547,0.354658


In [23]:
def agg(x):
    df_agg = x.groupby("policy").agg(
        cost=("cost", "mean"),
        cpu_time=("cpu_time", "mean"),
        surrogate_pct=("surrogate_pct", "mean"),
        real_pct=("real_pct", "mean"),
    )
    df_agg["cost_ratio_to_baseline"] = df_agg["cost"] / df_agg.at["baseline", "cost"]
    df_agg["cpu_time_ratio_to_baseline"] = df_agg["cpu_time"] / df_agg.at["baseline", "cpu_time"]
    df_agg = df_agg.round(4)
    df_agg = df_agg.loc[:, ["cost", "cost_ratio_to_baseline", "cpu_time", "cpu_time_ratio_to_baseline", "surrogate_pct", "real_pct"]]
    return df_agg

df25 = df.loc[df["n_train"] == "25"].copy()
df100 = df.loc[df["n_train"] == "100"].copy()

df25_agg = agg(df25)
df100_agg = agg(df100)

In [None]:
df25_agg.to

Unnamed: 0_level_0,cost,cost_ratio_to_baseline,cpu_time,cpu_time_ratio_to_baseline,surrogate_pct,real_pct
policy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
baseline,0.3242,1.0,1.0703,1.0,0.0,1.0
ea,0.3496,1.0785,0.8166,0.7629,0.4641,0.6681
eb,0.362,1.1167,0.5873,0.5487,0.9258,0.6856
ec,0.222,0.6849,1.7958,1.6778,0.9145,1.0
ia,0.2439,0.7525,0.7427,0.6939,0.4432,0.709
ib,0.3547,1.0941,0.41,0.3831,0.9196,0.356


In [27]:
df100_agg

Unnamed: 0_level_0,cost,cost_ratio_to_baseline,cpu_time,cpu_time_ratio_to_baseline,surrogate_pct,real_pct
policy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
baseline,0.2737,1.0,4.4087,1.0,0.0,1.0
ea,0.3301,1.2061,3.2071,0.7274,0.4649,0.669
eb,0.3564,1.3022,2.1233,0.4816,0.9299,0.6667
ec,0.2644,0.966,11.8481,2.6875,0.9238,1.0
ia,0.2338,0.8542,3.0535,0.6926,0.4441,0.7091
ib,0.3547,1.296,1.7291,0.3922,0.9226,0.3946


In [66]:
db = DB(PHASE2_DIR / "25/eb/run-policy-eb-25-1012840.db")
results = db.get_results()