In [1]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file

In [2]:
generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "CEPS_benchmark" / "index.json")
test_instances = InstanceList()
for i in range(10):
    test_instances.append(instances[i * 50])

id_to_generator_name = {instance.id(): instance.filepath.parts[-2] for instance in test_instances}

In [3]:
PREFIX = "run-245-sur-50"
frames = []

for idx, db_path in enumerate(DATABASE_DIR.glob(f"{PREFIX}-*.db")):
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    series = (
        results.loc[results["prefix"].str.startswith("test")]
        .groupby(["instance_id", "prefix"])["cost"]
        .min()
        .reset_index()
        .groupby("instance_id")["cost"]
        .quantile(0.5, interpolation="lower")
        # .mean()
        .rename(idx)
    )
    frames.append(series)
    

df = (
    pd.concat(frames, axis=1)
    .mean(axis=1)
    # .std(axis=1)
    .round(2)
    .rename(PREFIX)
    .to_frame()
)
df.index = df.index.map(id_to_generator_name)
df.loc[generators].T

instance_id,cluster_netgen,compression,expansion,explosion,grid,cluster,implosion,linearprojection,rotation,uniform_portgen
run-245-sur-50,1.54,2.63,22.37,41.36,25.69,23.11,1.78,0.91,2.63,21.68


In [4]:
df.mean().round(2)

run-245-sur-50    14.37
dtype: float64

In [5]:
# db_path = list(DATABASE_DIR.glob(f"{PREFIX}-*.db"))[1]
# db = DB(db_path)
# results = pd.read_sql_query("SELECT * FROM results", db._conn)
# (
#     results.loc[results["prefix"].str.startswith("test")]
#     .groupby(["instance_id", "prefix"])["cost"]
#     .min()
#     .reset_index()
#     .loc[lambda x: x["instance_id"] == "1765803814609198146", "cost"].value_counts()
#     # .groupby("instance_id")["cost"]
#     # .quantile(0.5, interpolation="lower")
#     # .rename(idx)
# )

In [6]:
df = (
    pd.concat(frames, axis=1)
    .eq(100)
    .sum(axis=1)
    .rename(PREFIX)
    .to_frame()
)
df.index = df.index.map(id_to_generator_name)
df.loc[generators].T

instance_id,cluster_netgen,compression,expansion,explosion,grid,cluster,implosion,linearprojection,rotation,uniform_portgen
run-245-sur-50,0,0,1,2,1,1,0,0,0,1


In [7]:
times = {}

for idx, db_path in enumerate(DATABASE_DIR.glob(f"{PREFIX}-*.db")):
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    times[idx] = results.loc[
        results["prefix"].str.startswith("config")
        & results["cached"].eq(0)
        & results["surrogate"].eq(0),
        "time",
    ].sum()
    
np.round(pd.Series(times).mean(), 2)

252389.29

In [8]:
pd.Series(times).mean() / 3600

70.10813496191714