In [1]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR, MAIN_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file
from src.solver.TSP_LKH_Solver import TSP_LKH_Solver

In [2]:
# MAX_COST = 8.9
# MAX_TIME = 0.89
# DIR = DATABASE_DIR
# DIR = (MAIN_DIR / "_archive" / "experiments" / "2025-03-05" / "200" / "database")

# MAX_COST = 41.0
# MAX_TIME = 4.1
# DIR = (MAIN_DIR / "_archive" / "experiments" / "2025-03-05" / "400" / "database")

MAX_COST = 100.0
MAX_TIME = 10.0
# DIR = (MAIN_DIR / "_archive" / "experiments" / "2025-01-27" / "600" / "database")
DIR = DATABASE_DIR

generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

test_instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "TEST_600" / "index.json")

id_to_generator_name = {
    instance.id(): instance.filepath.parts[-2] for instance in test_instances
}

id_to_name = {
    instance.id(): instance.filepath.parts[-1] for instance in test_instances
}

In [18]:
def agg_prefix(prefix):

    frames = []

    def agg_cost(x):
        count = x.shape[0]
        x = x[x < 100]
        x = x.sort_values()
        idx = count // 2
        if x.shape[0] <= idx:
            return x.iloc[-1]
        return x.iloc[idx]
        

    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        print(db_path)
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        series = (
            results.loc[results["prefix"].str.startswith("test")]
            .groupby(["instance_id", "prefix"])["cost"]
            .min()
            .reset_index()
            .groupby("instance_id")["cost"]
            .agg(agg_cost)
        )
        frames.append(series)
        

    df = (
        pd.concat(frames, axis=1)
        .mean(axis=1)
        .round(3)
        .rename(prefix)
        .to_frame()
    )

    df["generator"] = df.index.map(id_to_generator_name)
    df["name"] = df.index.map(id_to_name)
    df = df.pivot_table(index="generator", columns="name", values=prefix).loc[generators, :]
    return df

n = 250
plain = agg_prefix(f"run-plain-{n}")
plain = plain.assign(generator=f"plain-{n}").set_index("generator", append=True)

C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-250-scaling-981437.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-250-scaling-981438.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-250-scaling-981439.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-250-scaling-981440.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-250-scaling-981441.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-250-scaling-981442.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-250-scaling-981443.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-250-scaling-98

In [None]:
def agg_cost(x):
    count = x.shape[0]
    x = x[x < 100]
    x = x.sort_values()
    idx = count // 2
    if x.shape[0] <= idx:
        return x.iloc[-1]
    return x.iloc[idx]

db_path = DIR / f"run-plain-30-980890.db"
db = DB(db_path)
results = pd.read_sql_query("SELECT * FROM results", db._conn)
series = (
    results.loc[results["prefix"].str.startswith("test")]
    .groupby(["instance_id", "prefix"])["cost"]
    .min()
    .reset_index()
    .groupby("instance_id")["cost"]
    .agg(agg_cost)
)
series

# frame = series.to_frame()
# frame["generator"] = frame.index.map(id_to_generator_name)
# frame["name"] = frame.index.map(id_to_name)
# frame = frame.pivot_table(index="generator", columns="name", values="cost").loc[generators, :]
# print(frame.mean().mean())
# # frame

# # 980931 --> 2.1948
# # 980935 --> 5.2959
# # 980937 --> 3.8926

In [14]:
print(f"mean: {plain.mean().mean():.3f}")

mean: 1.845


In [15]:
df = plain.copy().sort_index(level=0).loc[generators]   
df["mean"] = df.mean(axis=1)

In [16]:
df.style.background_gradient(cmap="Reds", vmin=0, vmax=5).format("{:.3f}")

Unnamed: 0_level_0,name,000.tsp,001.tsp,002.tsp,003.tsp,004.tsp,mean
generator,generator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cluster_netgen,plain-500,3.648,4.616,3.958,1.763,1.008,2.999
compression,plain-500,0.792,3.328,1.37,1.783,1.639,1.782
expansion,plain-500,1.485,2.615,1.26,0.979,2.414,1.751
explosion,plain-500,0.463,0.591,1.622,1.129,1.469,1.055
grid,plain-500,0.274,1.225,4.029,0.768,1.28,1.515
cluster,plain-500,1.0,1.835,1.213,1.421,5.374,2.169
implosion,plain-500,4.015,0.91,0.672,3.22,6.139,2.991
linearprojection,plain-500,0.702,0.953,1.729,0.622,2.883,1.378
rotation,plain-500,2.757,1.402,1.311,1.528,0.997,1.599
uniform_portgen,plain-500,1.459,1.092,0.611,0.882,2.023,1.213


In [7]:
df.to_excel("tmp.xlsx")

In [19]:
def agg_prefix(prefix):
    times = {}
    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        times[idx] = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0),
            "time",
        ].sum() / 3600
    return np.round(pd.Series(times).mean(), 2)

plain = agg_prefix(f"run-plain-{n}")
print(f"plain: {plain:.2f} h")

plain: 38.50 h


In [20]:
# skip cutt-offs
def agg_prefix(prefix):
    times = {}
    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        print(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        times[idx] = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0)
            & results["cost"].lt(MAX_COST),
            "time",
        ].mean()
    return np.round(pd.Series(times).mean(), 2)

plain = agg_prefix(f"run-plain-{n}")
print(f"plain: {plain:.2f}")

C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-03-05\200\database\run-plain-500-980929.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-03-05\200\database\run-plain-500-980930.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-03-05\200\database\run-plain-500-980931.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-03-05\200\database\run-plain-500-980932.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-03-05\200\database\run-plain-500-980933.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\_archive\experiments\2025-03-05\200\database\run-plain-500-980934.db
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfol

In [21]:
# % cutt-offs
def agg_prefix(prefix):
    times = {}
    for idx, db_path in enumerate(DIR.glob(f"{prefix}-*.db")):
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        times[idx] = results.loc[
            results["prefix"].str.startswith("config")
            & results["cached"].eq(0)
            & results["surrogate"].eq(0),
            "cost"
        ].eq(MAX_COST).mean()
    return np.round(pd.Series(times).mean(), 2)

plain = agg_prefix(f"run-plain-{n}")
print(f"plain: {plain:.2f}")

plain: 0.51
