In [1]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file
from src.solver.TSP_LKH_Solver import TSP_LKH_Solver

In [2]:
generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

test_instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "TEST" / "index.json")

id_to_generator_name = {
    instance.id(): instance.filepath.parts[-2] for instance in test_instances
}

id_to_name = {
    instance.id(): instance.filepath.parts[-1] for instance in test_instances
}

In [3]:
PREFIX = "run-plain-30"
# PREFIX = "run-30-sur-50"

In [4]:
rows = []
for idx, db_path in enumerate(DATABASE_DIR.glob(f"{PREFIX}-*.db")):
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    results_config = results.loc[results["prefix"].str.startswith("config"), :]
    config_cutoff_ratio = (results_config["cost"] == TSP_LKH_Solver.MAX_COST).mean()
    config_error_ratio = (results_config["error"] == 1).mean()
    config_surrogate_ratio = (results_config["surrogate"] == 1).mean()

    results_test = results.loc[results["prefix"].str.startswith("test"), :]
    test_cutoff_ratio = (results_test["cost"] == TSP_LKH_Solver.MAX_COST).mean()
    test_error_ratio = (results_test["error"] == 1).mean()

    rows.append({
        "name": db_path.name,
        "config_cutoff_ratio": config_cutoff_ratio,
        "config_error_ratio": config_error_ratio,
        "config_surrogate_ratio": config_surrogate_ratio,
        "test_cutoff_ratio": test_cutoff_ratio,
        "test_error_ratio": test_error_ratio,
    })

pd.DataFrame(rows).style.format(precision=2)

Unnamed: 0,name,config_cutoff_ratio,config_error_ratio,config_surrogate_ratio,test_cutoff_ratio,test_error_ratio
0,run-30-sur-50-951415.db,0.52,0.08,0.26,0.41,0.0
1,run-30-sur-50-951427.db,0.5,0.09,0.26,0.02,0.0
2,run-30-sur-50-951428.db,0.53,0.06,0.26,0.16,0.01
3,run-30-sur-50-951429.db,0.44,0.06,0.26,0.13,0.0
4,run-30-sur-50-951430.db,0.47,0.06,0.26,0.43,0.05
5,run-30-sur-50-951431.db,0.5,0.07,0.26,0.5,0.0
6,run-30-sur-50-951432.db,0.52,0.07,0.26,0.5,0.01
7,run-30-sur-50-951433.db,0.47,0.06,0.26,0.0,0.0
8,run-30-sur-50-951434.db,0.44,0.05,0.26,0.03,0.0
9,run-30-sur-50-951435.db,0.47,0.06,0.26,0.03,0.03


In [18]:
frames = []

def agg_cost(x):
    count = x.shape[0]
    x = x[x < TSP_LKH_Solver.MAX_COST]
    idx = count // 2
    if x.shape[0] <= idx:
        return x.iloc[-1]
    return x.iloc[idx]
    

for idx, db_path in enumerate(DATABASE_DIR.glob(f"{PREFIX}-*.db")):
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    series = (
        results.loc[results["prefix"].str.startswith("test")]
        .groupby(["instance_id", "prefix"])["cost"]
        .min()
        .reset_index()
        .groupby("instance_id")["cost"]
        .agg(agg_cost)
    )
    frames.append(series)
    

df = (
    pd.concat(frames, axis=1)
    .mean(axis=1)
    .round(2)
    .rename(PREFIX)
    .to_frame()
)

df["generator"] = df.index.map(id_to_generator_name)
df["name"] = df.index.map(id_to_name)
df = df.pivot_table(index="generator", columns="name", values=PREFIX).loc[generators, :]
df["mean"] = df.mean(axis=1)

In [19]:
df

name,000.tsp,001.tsp,002.tsp,003.tsp,004.tsp,mean
generator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cluster_netgen,0.16,0.76,0.86,0.49,1.4,0.734
compression,0.42,0.34,1.6,0.31,0.19,0.572
expansion,0.82,0.31,0.72,0.55,0.19,0.518
explosion,0.58,0.24,0.12,0.52,0.42,0.376
grid,0.38,0.39,0.32,0.82,0.19,0.42
cluster,0.51,0.19,0.35,0.24,0.14,0.286
implosion,0.08,0.31,1.61,0.58,0.27,0.57
linearprojection,0.23,0.6,0.33,0.19,0.39,0.348
rotation,0.41,2.33,0.16,0.23,0.27,0.68
uniform_portgen,1.28,1.03,0.16,0.66,0.23,0.672


In [53]:
df.mean().round(2)

run-30-sur-50    0.52
dtype: float64

In [54]:
times = {}

for idx, db_path in enumerate(DATABASE_DIR.glob(f"{PREFIX}-*.db")):
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    times[idx] = results.loc[
        results["prefix"].str.startswith("config")
        & results["cached"].eq(0)
        & results["surrogate"].eq(0),
        "time",
    ].sum()
    
np.round(pd.Series(times).mean(), 2)

20640.65

In [55]:
pd.Series(times).mean() / 3600

5.73351327659766