In [1]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file
from src.solver.TSP_LKH_Solver import TSP_LKH_Solver

In [2]:
generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

test_instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "TEST" / "index.json")

id_to_generator_name = {
    instance.id(): instance.filepath.parts[-2] for instance in test_instances
}

id_to_name = {
    instance.id(): instance.filepath.parts[-1] for instance in test_instances
}

In [60]:
def agg_prefix(prefix):

    frames = []

    def agg_cost(x):
        count = x.shape[0]
        x = x[x < TSP_LKH_Solver.MAX_COST]
        idx = count // 2
        if x.shape[0] <= idx:
            return x.iloc[-1]
        return x.iloc[idx]
        

    for idx, db_path in enumerate(DATABASE_DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        series = (
            results.loc[results["prefix"].str.startswith("test")]
            .groupby(["instance_id", "prefix"])["cost"]
            .min()
            .reset_index()
            .groupby("instance_id")["cost"]
            .agg(agg_cost)
        )
        frames.append(series)
        

    df = (
        pd.concat(frames, axis=1)
        .mean(axis=1)
        .round(2)
        .rename(prefix)
        .to_frame()
    )

    df["generator"] = df.index.map(id_to_generator_name)
    df["name"] = df.index.map(id_to_name)
    df = df.pivot_table(index="generator", columns="name", values=prefix).loc[generators, :]
    return df

n = 500
plain = agg_prefix(f"run-plain-{n}")
sur = agg_prefix(f"run-{n}-sur-50")

sur = sur.assign(generator="surrogate-50").set_index("generator", append=True)
plain = plain.assign(generator=f"plain-{n}").set_index("generator", append=True)

In [61]:
df = pd.concat([plain, sur], axis=0).sort_index(level=0).loc[generators]   
df["mean"] = df.mean(axis=1)

In [62]:
plain_total_mean = plain.mean(axis=1).mean(axis=0)
sur_total_mean = sur.mean(axis=1).mean(axis=0)
print(f"{plain_total_mean=:.2f}", f"{sur_total_mean=:.2f}")

plain_total_mean=0.30 sur_total_mean=0.30


In [63]:
plain_training = plain.iloc[:5].mean(axis=1).mean(axis=0)
sur_training = sur.iloc[:5].mean(axis=1).mean(axis=0)

plain_not_training = plain.iloc[5:].mean(axis=1).mean(axis=0)
sur_not_training = sur.iloc[5:].mean(axis=1).mean(axis=0)

print(f"{plain_training=:.2f}", f"{sur_training=:.2f}")
print(f"{plain_not_training=:.2f}", f"{sur_not_training=:.2f}")

plain_training=0.30 sur_training=0.29
plain_not_training=0.31 sur_not_training=0.32


In [65]:
sur

Unnamed: 0_level_0,name,000.tsp,001.tsp,002.tsp,003.tsp,004.tsp
generator,generator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cluster_netgen,surrogate-50,0.11,0.2,0.64,0.3,0.61
compression,surrogate-50,0.14,0.13,1.19,0.32,0.1
expansion,surrogate-50,0.3,0.14,0.39,0.42,0.2
explosion,surrogate-50,0.54,0.09,0.19,0.11,0.22
grid,surrogate-50,0.35,0.09,0.23,0.18,0.1
cluster,surrogate-50,0.16,0.15,0.15,0.16,0.15
implosion,surrogate-50,0.04,0.14,0.27,0.28,0.1
linearprojection,surrogate-50,0.19,0.35,0.14,0.09,0.16
rotation,surrogate-50,0.64,3.12,0.1,0.16,0.12
uniform_portgen,surrogate-50,0.3,0.27,0.06,0.56,0.06


In [7]:
# 
df.to_excel("tmp.xlsx")

In [53]:
df.mean().round(2)

run-30-sur-50    0.52
dtype: float64

In [54]:
times = {}

for idx, db_path in enumerate(DATABASE_DIR.glob(f"{PREFIX}-*.db")):
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    times[idx] = results.loc[
        results["prefix"].str.startswith("config")
        & results["cached"].eq(0)
        & results["surrogate"].eq(0),
        "time",
    ].sum()
    
np.round(pd.Series(times).mean(), 2)

20640.65

In [55]:
pd.Series(times).mean() / 3600

5.73351327659766