In [1]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR, MAIN_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file
from src.solver.TSP_LKH_Solver import TSP_LKH_Solver

In [2]:
MAX_COST = 8.9
MAX_TIME = 0.89
DIR = DATABASE_DIR
# MAX_COST = 100.0
# MAX_TIME = 10.0
# DIR = (MAIN_DIR / "_archive" / "experiments" / "2025-01-27" / "600" / "database")

generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

test_instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "TEST_600" / "index.json")

id_to_generator_name = {
    instance.id(): instance.filepath.parts[-2] for instance in test_instances
}

id_to_name = {
    instance.id(): instance.filepath.parts[-1] for instance in test_instances
}

In [3]:
def agg_prefix(prefix):

    frames = []

    def agg_cost(x):
        count = x.shape[0]
        x = x[x < MAX_COST]
        x = x.sort_values()
        idx = count // 2
        # if x.shape[0] == 0:
        #     return np.nan
        if x.shape[0] <= idx:
            return x.iloc[-1]
        return x.iloc[idx]
        

    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        print(db_path)
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        series = (
            results.loc[results["prefix"].str.startswith("test")]
            .groupby(["instance_id", "prefix"])["cost"]
            .min()
            .reset_index()
            .groupby("instance_id")["cost"]
            .agg(agg_cost)
        )
        frames.append(series)
        

    df = (
        pd.concat(frames, axis=1)
        .mean(axis=1)
        .round(3)
        .rename(prefix)
        .to_frame()
    )

    df["generator"] = df.index.map(id_to_generator_name)
    df["name"] = df.index.map(id_to_name)
    df = df.pivot_table(index="generator", columns="name", values=prefix).loc[generators, :]
    return df

n = 500
plain = agg_prefix(f"run-plain-{n}")
plain = plain.assign(generator=f"plain-{n}").set_index("generator", append=True)

C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-980929.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-980930.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-980931.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-980932.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-980933.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-980934.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-980936.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-980937.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\r

In [5]:
# def agg_cost(x):
#     count = x.shape[0]
#     x = x[x < MAX_COST]
#     x = x.sort_values()
#     idx = count // 2
#     if x.shape[0] <= idx:
#         return x.iloc[-1]
#     return x.iloc[idx]

# db_path = DIR / f"run-plain-500-980935.db"
# db = DB(db_path)
# results = pd.read_sql_query("SELECT * FROM results", db._conn)
# series = (
#     results.loc[results["prefix"].str.startswith("test")]
#     .groupby(["instance_id", "prefix"])["cost"]
#     .min()
#     .reset_index()
#     .groupby("instance_id")["cost"]
#     .agg(agg_cost)
# )
# frame = series.to_frame()
# frame["generator"] = frame.index.map(id_to_generator_name)
# frame["name"] = frame.index.map(id_to_name)
# frame = frame.pivot_table(index="generator", columns="name", values="cost").loc[generators, :]
# print(frame.mean().mean())
# frame

In [6]:
plain_total_mean = plain.mean(axis=1).mean(axis=0)
print(f"{plain_total_mean=:.3f}")

plain_total_mean=1.285


In [10]:
df = plain.copy().sort_index(level=0).loc[generators]   
df["mean"] = df.mean(axis=1)

In [11]:
df.style.background_gradient(cmap="Reds", vmin=0, vmax=5).format("{:.3f}")

Unnamed: 0_level_0,name,000.tsp,001.tsp,002.tsp,003.tsp,004.tsp,mean
generator,generator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cluster_netgen,plain-500,1.892,1.832,1.888,2.03,0.647,1.658
compression,plain-500,0.639,0.878,0.866,0.99,0.864,0.847
expansion,plain-500,1.398,2.723,1.279,0.914,3.195,1.902
explosion,plain-500,0.482,0.559,2.641,2.111,1.12,1.383
grid,plain-500,0.227,2.181,3.066,1.098,1.925,1.699
cluster,plain-500,0.934,2.168,1.901,2.215,3.495,2.143
implosion,plain-500,1.829,0.725,0.704,1.923,4.882,2.013
linearprojection,plain-500,1.386,0.466,1.836,1.257,1.113,1.212
rotation,plain-500,2.143,2.335,2.814,2.222,2.054,2.314
uniform_portgen,plain-500,1.27,2.398,1.388,1.175,2.242,1.695


In [13]:
df.to_excel("tmp.xlsx")

In [None]:
def agg_prefix(prefix):
    times = {}
    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        times[idx] = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0),
            "time",
        ].sum() / 3600
    return np.round(pd.Series(times).mean(), 2)

plain = agg_prefix(f"run-plain-{n}")
print(f"plain: {plain:.2f} h")

plain: 153.99 h


In [49]:
# skip cutt-offs
def agg_prefix(prefix):
    times = {}
    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        print(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        times[idx] = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0)
            & results["cost"].lt(MAX_COST),
            "time",
        ].mean()
    return np.round(pd.Series(times).mean(), 2)

plain = agg_prefix(f"run-plain-{n}")
print(f"plain: {plain:.2f}")

C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-01-27\600\database\run-plain-500-975016.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-01-27\600\database\run-plain-500-975017.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-01-27\600\database\run-plain-500-975018.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-01-27\600\database\run-plain-500-975019.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-01-27\600\database\run-plain-500-975020.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-01-27\600\database\run-plain-500-975021.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfol

In [50]:
# % cutt-offs
def agg_prefix(prefix):
    times = {}
    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        times[idx] = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0),
            "cost"
        ].eq(MAX_COST).mean()
    return np.round(pd.Series(times).mean(), 2)

plain = agg_prefix(f"run-plain-{n}")
print(f"plain: {plain:.2f}")

plain: 0.63
