In [5]:
import numpy as np
import pandas as pd

from src.constant import DATABASE_DIR
from src.database import DB
from src.database.queries import *
pd.options.display.max_colwidth = 200

## loading

In [6]:
records = []

n = 30
for sur in ["plain", "sur-50"]:
    if sur == "plain":
        prefix = f"run-plain-{n}-*.db"
    else:
        prefix = f"run-{n}-{sur}-*.db"

    databases = list(DATABASE_DIR.glob(prefix))

    for db_path in databases:
        print(db_path)
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)

        # test_costs
        test_costs = (
            results.loc[results["prefix"].str.startswith("test")]
            .groupby(["instance_id", "prefix"])["cost"]
            .min()
            .reset_index()
            .groupby("instance_id")["cost"]
        )
        
        # cost skip cut-offs
        def _cost_skip_cutoff(x):
            count = x.shape[0]
            x = x[x < 1000.0]
            x = x.sort_values()
            idx = count // 2
            if x.shape[0] == 0:
                return 100.0
            if x.shape[0] <= idx:
                return x.iloc[-1]
            return x.iloc[idx]

        try:
            cost_skip_cutoff = test_costs.agg(_cost_skip_cutoff).mean()
        except Exception as e:
            print(db_path, e)
            cost_skip_cutoff = np.nan

        # par-10
        def _par10(x):
            count = x.shape[0]
            x = x.sort_values()
            idx = count // 2
            return x.iloc[idx]

        par10 = test_costs.agg(_par10).mean()

        # % of cut-offs
        def _test_pct_cutoff(x):
            return (x == 1000.0).mean()

        test_pct_cutoff = test_costs.agg(_test_pct_cutoff).mean()

        # total cpu time
        config = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0),
        ].copy()
        cpu_time = config["time"].sum() / 3600

        # record
        record = {
            "n": n,
            "sur": sur,
            "db_path": db_path,
            "cost_skip_cutoff": cost_skip_cutoff,
            "par10": par10,
            "test_pct_cutoff": test_pct_cutoff,
            "cpu_time": cpu_time,
        }
        records.append(record)

df = pd.DataFrame(records)
#

C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027416.db
C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027417.db
C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027418.db
C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027419.db
C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027420.db
C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027421.db
C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027422.db
C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027423.db
C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-102

In [7]:
df

Unnamed: 0,n,sur,db_path,cost_skip_cutoff,par10,test_pct_cutoff,cpu_time
0,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027416.db,5.007522,5.007522,0.0,7.158777
1,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027417.db,5.337729,5.337729,0.0,7.6518
2,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027418.db,2.04322,2.04322,0.0,7.128663
3,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027419.db,5.143077,5.143077,0.0,7.424674
4,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027420.db,1.919854,1.919854,0.0,7.544879
5,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027421.db,5.060848,5.060848,0.0,7.181167
6,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027422.db,4.841837,4.841837,0.0,7.661298
7,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027423.db,3.862588,3.862588,0.0,7.309754
8,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027424.db,5.028222,5.028222,0.0,7.729512
9,30,plain,C:\Users\grzegorzzakrzewski\Documents\DataScience\raw-algorithm-portfolios-v2b\database\run-plain-30-1027425.db,3.207572,3.207572,0.0,7.196363


In [8]:
df.groupby(["n", "sur"])[["cost_skip_cutoff", "cpu_time"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,cost_skip_cutoff,cpu_time
n,sur,Unnamed: 2_level_1,Unnamed: 3_level_1
30,plain,4.145247,7.398689
