In [8]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file
from src.solver.TSP_LKH_Solver import TSP_LKH_Solver

In [9]:
generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

test_instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "TEST" / "index.json")

id_to_generator_name = {
    instance.id(): instance.filepath.parts[-2] for instance in test_instances
}

id_to_name = {
    instance.id(): instance.filepath.parts[-1] for instance in test_instances
}

In [14]:
def agg_prefix(prefix):

    frames = []

    def agg_cost(x):
        count = x.shape[0]
        x = x[x < TSP_LKH_Solver.MAX_COST]
        idx = count // 2
        if x.shape[0] <= idx:
            return x.iloc[-1]
        return x.iloc[idx]
        

    for idx, db_path in enumerate(DATABASE_DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        series = (
            results.loc[results["prefix"].str.startswith("test")]
            .groupby(["instance_id", "prefix"])["cost"]
            .min()
            .reset_index()
            .groupby("instance_id")["cost"]
            .agg(agg_cost)
        )
        frames.append(series)
        

    df = (
        pd.concat(frames, axis=1)
        .mean(axis=1)
        .round(3)
        .rename(prefix)
        .to_frame()
    )

    df["generator"] = df.index.map(id_to_generator_name)
    df["name"] = df.index.map(id_to_name)
    df = df.pivot_table(index="generator", columns="name", values=prefix).loc[generators, :]
    return df

n = 500
plain = agg_prefix(f"run-plain-{n}")
sur25 = agg_prefix(f"run-{n}-sur-25")
sur50 = agg_prefix(f"run-{n}-sur-50")
sur75 = agg_prefix(f"run-{n}-sur-75")

plain = plain.assign(generator=f"plain-{n}").set_index("generator", append=True)
sur25 = sur25.assign(generator="surrogate-25").set_index("generator", append=True)
sur50 = sur50.assign(generator="surrogate-50").set_index("generator", append=True)
sur75 = sur75.assign(generator="surrogate-75").set_index("generator", append=True)

In [15]:
df = pd.concat([plain, sur25, sur50, sur75], axis=0).sort_index(level=0).loc[generators]   
df["mean"] = df.mean(axis=1)

In [16]:
plain_total_mean = plain.mean(axis=1).mean(axis=0)
sur25_total_mean = sur25.mean(axis=1).mean(axis=0)
sur50_total_mean = sur50.mean(axis=1).mean(axis=0)
sur75_total_mean = sur75.mean(axis=1).mean(axis=0)
print(f"{plain_total_mean=:.3f}")
print(f"{sur25_total_mean=:.3f}")
print(f"{sur50_total_mean=:.3f}")
print(f"{sur75_total_mean=:.3f}")

plain_total_mean=0.304
sur25_total_mean=0.303
sur50_total_mean=0.304
sur75_total_mean=0.363


In [17]:
plain_training = plain.iloc[:5].mean(axis=1).mean(axis=0)
sur25_training = sur25.iloc[:5].mean(axis=1).mean(axis=0)
sur50_training = sur50.iloc[:5].mean(axis=1).mean(axis=0)
sur75_training = sur75.iloc[:5].mean(axis=1).mean(axis=0)

plain_not_training = plain.iloc[5:].mean(axis=1).mean(axis=0)
sur25_not_training = sur25.iloc[5:].mean(axis=1).mean(axis=0)
sur50_not_training = sur50.iloc[5:].mean(axis=1).mean(axis=0)
sur75_not_training = sur75.iloc[5:].mean(axis=1).mean(axis=0)

print(f"{plain_training=:.3f}", f"{plain_not_training=:.3f}")
print(f"{sur25_training=:.3f}", f"{sur25_not_training=:.3f}")
print(f"{sur50_training=:.3f}", f"{sur50_not_training=:.3f}")
print(f"{sur75_training=:.3f}", f"{sur75_not_training=:.3f}")

plain_training=0.303 plain_not_training=0.306
sur25_training=0.320 sur25_not_training=0.287
sur50_training=0.291 sur50_not_training=0.316
sur75_training=0.356 sur75_not_training=0.370


In [18]:
# 
df.to_excel("tmp.xlsx")

In [54]:
times = {}

for idx, db_path in enumerate(DATABASE_DIR.glob(f"{PREFIX}-*.db")):
    db = DB(db_path)
    results = pd.read_sql_query("SELECT * FROM results", db._conn)
    times[idx] = results.loc[
        results["prefix"].str.startswith("config")
        & results["cached"].eq(0)
        & results["surrogate"].eq(0),
        "time",
    ].sum()
    
np.round(pd.Series(times).mean(), 2)

20640.65

In [55]:
pd.Series(times).mean() / 3600

5.73351327659766

In [None]:
# def agg_cost(x):
#     count = x.shape[0]
#     x = x[x < TSP_LKH_Solver.MAX_COST]
#     idx = count // 2
#     if x.shape[0] <= idx:
#         return x.iloc[-1]
#     return x.iloc[idx]

# for idx, db_path in enumerate(DATABASE_DIR.glob(f"run-500-sur-25-*.db")):
#     db = DB(db_path)
#     results = pd.read_sql_query("SELECT * FROM results", db._conn)
#     series = (
#         results.loc[results["prefix"].str.startswith("test")]
#         .groupby(["instance_id", "prefix"])["cost"]
#         .min()
#         .reset_index()
#         .groupby("instance_id")["cost"]
#         .agg(agg_cost)
#     )
#     print(db_path, series.mean())