In [1]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file
from src.solver.TSP_LKH_Solver import TSP_LKH_Solver

In [2]:
generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "CEPS_benchmark" / "index.json")
test_instances = InstanceList()
for i in range(10):
    test_instances.append(instances[i * 50])

id_to_generator_name = {instance.id(): instance.filepath.parts[-2] for instance in test_instances}

In [3]:
# PREFIX = "run-plain-245"
PREFIX = "run-245-sur-50"
# PREFIX = "run-plain-500"
# PREFIX = "run-500-sur-50"
# PREFIX = "run-90-sur-50"
# PREFIX = "run-90-sur-70"
# PREFIX = "run-90-sur-90"

frames = []

def agg_cost(x):
    count = x.shape[0]
    x = x[x < 100]
    idx = count // 2
    if x.shape[0] <= idx:
        return x.iloc[-1]
    return x.iloc[idx]
    

for idx, db_path in enumerate(DATABASE_DIR.glob(f"{PREFIX}-*.db")):
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    series = (
        results.loc[results["prefix"].str.startswith("test")]
        .groupby(["instance_id", "prefix"])["cost"]
        .min()
        .reset_index()
        .groupby("instance_id")["cost"]
        .agg(agg_cost)
    )
    frames.append(series)
    

df = (
    pd.concat(frames, axis=1)
    .mean(axis=1)
    # .std(axis=1)
    .round(2)
    .rename(PREFIX)
    .to_frame()
)
df.index = df.index.map(id_to_generator_name)
df.loc[generators].T

instance_id,cluster_netgen,compression,expansion,explosion,grid,cluster,implosion,linearprojection,rotation,uniform_portgen
run-245-sur-50,2.35,3.16,4.16,2.31,3.63,5.6,2.03,2.28,2.35,2.53


In [4]:
df.loc[generators].mean().round(2)

run-245-sur-50    3.04
dtype: float64

In [6]:
times = {}

for idx, db_path in enumerate(DATABASE_DIR.glob(f"{PREFIX}-*.db")):
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    times[idx] = results.loc[
        results["prefix"].str.startswith("config")
        & results["cached"].eq(0)
        & results["surrogate"].eq(0),
        "time",
    ].sum()
    
np.round(pd.Series(times).mean(), 2)

888050.07

In [7]:
pd.Series(times).mean() / 3600

246.68057431587908