In [12]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR, MAIN_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file
from src.solver.TSP_LKH_Solver import TSP_LKH_Solver

In [2]:
# MAX_COST = 8.9
# MAX_TIME = 0.89
# DIR = DATABASE_DIR

MAX_COST = 41.0
MAX_TIME = 4.1
DIR = DATABASE_DIR

# MAX_COST = 100.0
# MAX_TIME = 10.0
# DIR = (MAIN_DIR / "_archive" / "experiments" / "2025-01-27" / "600" / "database")

generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

test_instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "TEST_600" / "index.json")

id_to_generator_name = {
    instance.id(): instance.filepath.parts[-2] for instance in test_instances
}

id_to_name = {
    instance.id(): instance.filepath.parts[-1] for instance in test_instances
}

In [3]:
def agg_prefix(prefix):

    frames = []

    def agg_cost(x):
        count = x.shape[0]
        x = x[x < MAX_COST]
        x = x.sort_values()
        idx = count // 2
        # if x.shape[0] == 0:
        #     return np.nan
        if x.shape[0] <= idx:
            return x.iloc[-1]
        return x.iloc[idx]
        

    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        print(db_path)
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        series = (
            results.loc[results["prefix"].str.startswith("test")]
            .groupby(["instance_id", "prefix"])["cost"]
            .min()
            .reset_index()
            .groupby("instance_id")["cost"]
            .agg(agg_cost)
        )
        frames.append(series)
        

    df = (
        pd.concat(frames, axis=1)
        .mean(axis=1)
        .round(3)
        .rename(prefix)
        .to_frame()
    )

    df["generator"] = df.index.map(id_to_generator_name)
    df["name"] = df.index.map(id_to_name)
    df = df.pivot_table(index="generator", columns="name", values=prefix).loc[generators, :]
    return df

n = 500
plain = agg_prefix(f"run-plain-{n}")
plain = plain.assign(generator=f"plain-{n}").set_index("generator", append=True)

C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981056.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981057.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981058.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981059.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981060.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981061.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981062.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981063.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\r

In [4]:
# def agg_cost(x):
#     count = x.shape[0]
#     x = x[x < MAX_COST]
#     x = x.sort_values()
#     idx = count // 2
#     if x.shape[0] <= idx:
#         return x.iloc[-1]
#     return x.iloc[idx]

# db_path = DIR / f"run-plain-500-980935.db"
# db = DB(db_path)
# results = pd.read_sql_query("SELECT * FROM results", db._conn)
# series = (
#     results.loc[results["prefix"].str.startswith("test")]
#     .groupby(["instance_id", "prefix"])["cost"]
#     .min()
#     .reset_index()
#     .groupby("instance_id")["cost"]
#     .agg(agg_cost)
# )
# frame = series.to_frame()
# frame["generator"] = frame.index.map(id_to_generator_name)
# frame["name"] = frame.index.map(id_to_name)
# frame = frame.pivot_table(index="generator", columns="name", values="cost").loc[generators, :]
# print(frame.mean().mean())
# frame

In [5]:
plain_total_mean = plain.mean(axis=1).mean(axis=0)
print(f"{plain_total_mean=:.3f}")

plain_total_mean=0.739


In [6]:
df = plain.copy().sort_index(level=0).loc[generators]   
df["mean"] = df.mean(axis=1)

In [7]:
df.style.background_gradient(cmap="Reds", vmin=0, vmax=5).format("{:.3f}")

Unnamed: 0_level_0,name,000.tsp,001.tsp,002.tsp,003.tsp,004.tsp,mean
generator,generator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cluster_netgen,plain-500,0.763,1.444,1.481,1.514,0.585,1.157
compression,plain-500,0.294,0.62,0.624,0.546,0.716,0.56
expansion,plain-500,0.47,1.638,0.659,0.489,1.298,0.911
explosion,plain-500,0.187,0.346,1.063,0.367,0.868,0.566
grid,plain-500,0.132,0.394,1.14,0.263,0.419,0.47
cluster,plain-500,0.591,0.805,0.408,0.727,1.041,0.714
implosion,plain-500,0.601,0.342,0.293,0.897,4.083,1.243
linearprojection,plain-500,0.322,0.307,0.394,0.21,0.883,0.423
rotation,plain-500,0.987,0.554,0.923,0.709,0.647,0.764
uniform_portgen,plain-500,0.644,0.404,0.261,0.6,0.995,0.581


In [8]:
df.to_excel("tmp.xlsx")

In [9]:
def agg_prefix(prefix):
    times = {}
    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        times[idx] = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0),
            "time",
        ].sum() / 3600
    return np.round(pd.Series(times).mean(), 2)

plain = agg_prefix(f"run-plain-{n}")
print(f"plain: {plain:.2f} h")

plain: 60.10 h


In [10]:
# skip cutt-offs
def agg_prefix(prefix):
    times = {}
    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        print(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        times[idx] = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0)
            & results["cost"].lt(MAX_COST),
            "time",
        ].mean()
    return np.round(pd.Series(times).mean(), 2)

plain = agg_prefix(f"run-plain-{n}")
print(f"plain: {plain:.2f}")

C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981056.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981057.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981058.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981059.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981060.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981061.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981062.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-500-981063.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\r

In [11]:
# % cutt-offs
def agg_prefix(prefix):
    times = {}
    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        times[idx] = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0),
            "cost"
        ].eq(MAX_COST).mean()
    return np.round(pd.Series(times).mean(), 2)

plain = agg_prefix(f"run-plain-{n}")
print(f"plain: {plain:.2f}")

plain: 0.58
