In [10]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file
from src.solver.TSP_LKH_Solver import TSP_LKH_Solver

In [11]:
generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

test_instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "TEST_600" / "index.json")

id_to_generator_name = {
    instance.id(): instance.filepath.parts[-2] for instance in test_instances
}

id_to_name = {
    instance.id(): instance.filepath.parts[-1] for instance in test_instances
}

In [18]:
def agg_prefix(prefix):
    def agg_cost(x):
        count = x.shape[0]
        x = x[x < TSP_LKH_Solver.MAX_COST]
        x = x.sort_values()
        idx = count // 2
        if x.shape[0] <= idx:
            return x.iloc[-1]
        return x.iloc[idx]

    db_path = list(DATABASE_DIR.glob(f"{prefix}-*.db"))[0]
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    results["run_id"] = results["prefix"].str.split(";", expand=True)[0]
    runs = results["run_id"].unique()

    frames = []
    for run in runs:
        results_run = results[results["run_id"] == run]

        series = (
            results_run
            .groupby(["instance_id", "prefix"])["cost"]
            .min()
            .reset_index()
            .groupby("instance_id")["cost"]
            .agg(agg_cost)
        )
        frames.append(series)

    df = (
        pd.concat(frames, axis=1)
        .mean(axis=1)
        .round(3)
        .to_frame()
    )
    df["generator"] = df.index.map(id_to_generator_name)
    df["name"] = df.index.map(id_to_name)
    df = df.pivot_table(index="generator", columns="name", values=0).loc[generators, :]
    return df

n = 500
from_ = 400
plain = agg_prefix(f"run-plain-{n}-{from_}")
sur25 = agg_prefix(f"run-{n}-sur-25-{from_}")
sur50 = agg_prefix(f"run-{n}-sur-50-{from_}")
sur75 = agg_prefix(f"run-{n}-sur-75-{from_}")

plain = plain.assign(generator=f"plain-{n}").set_index("generator", append=True)
sur25 = sur25.assign(generator="surrogate-25").set_index("generator", append=True)
sur50 = sur50.assign(generator="surrogate-50").set_index("generator", append=True)
sur75 = sur75.assign(generator="surrogate-75").set_index("generator", append=True)

In [19]:
df = pd.concat([plain, sur25, sur50, sur75], axis=0).sort_index(level=0).loc[generators]   
df["mean"] = df.mean(axis=1)

In [20]:
df.style.background_gradient(cmap="Reds", vmin=0, vmax=5).format("{:.3f}")

Unnamed: 0_level_0,name,000.tsp,001.tsp,002.tsp,003.tsp,004.tsp,mean
generator,generator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cluster_netgen,plain-500,0.697,1.335,0.927,0.954,0.527,0.888
cluster_netgen,surrogate-25,0.938,1.506,1.442,0.97,0.698,1.111
cluster_netgen,surrogate-50,0.616,1.488,1.384,0.963,0.556,1.001
cluster_netgen,surrogate-75,0.969,1.648,1.704,1.194,0.676,1.238
compression,plain-500,0.242,0.52,0.626,0.708,0.513,0.522
compression,surrogate-25,0.258,0.506,0.6,0.644,0.602,0.522
compression,surrogate-50,0.294,0.553,0.642,0.764,0.612,0.573
compression,surrogate-75,0.66,0.599,1.07,1.14,0.7,0.834
expansion,plain-500,0.475,1.554,0.635,0.433,1.149,0.849
expansion,surrogate-25,0.51,1.24,0.558,0.39,1.244,0.788


In [21]:
plain_total_mean = plain.mean(axis=1).mean(axis=0)
sur25_total_mean = sur25.mean(axis=1).mean(axis=0)
sur50_total_mean = sur50.mean(axis=1).mean(axis=0)
sur75_total_mean = sur75.mean(axis=1).mean(axis=0)
print(f"{plain_total_mean=:.3f}")
print(f"{sur25_total_mean=:.3f}")
print(f"{sur50_total_mean=:.3f}")
print(f"{sur75_total_mean=:.3f}")

plain_total_mean=0.659
sur25_total_mean=0.677
sur50_total_mean=0.702
sur75_total_mean=0.879


In [22]:
plain_training = plain.iloc[:5].mean(axis=1).mean(axis=0)
sur25_training = sur25.iloc[:5].mean(axis=1).mean(axis=0)
sur50_training = sur50.iloc[:5].mean(axis=1).mean(axis=0)
sur75_training = sur75.iloc[:5].mean(axis=1).mean(axis=0)

plain_not_training = plain.iloc[5:].mean(axis=1).mean(axis=0)
sur25_not_training = sur25.iloc[5:].mean(axis=1).mean(axis=0)
sur50_not_training = sur50.iloc[5:].mean(axis=1).mean(axis=0)
sur75_not_training = sur75.iloc[5:].mean(axis=1).mean(axis=0)

print(f"{plain_training=:.3f}", f"{plain_not_training=:.3f}")
print(f"{sur25_training=:.3f}", f"{sur25_not_training=:.3f}")
print(f"{sur50_training=:.3f}", f"{sur50_not_training=:.3f}")
print(f"{sur75_training=:.3f}", f"{sur75_not_training=:.3f}")

plain_training=0.641 plain_not_training=0.676
sur25_training=0.683 sur25_not_training=0.671
sur50_training=0.687 sur50_not_training=0.716
sur75_training=0.855 sur75_not_training=0.903


In [23]:
# 
df.to_excel("tmp.xlsx")