In [1]:
import numpy as np
import pandas as pd

from src.constant import DATA_DIR, DATABASE_DIR
from src.database import DB
from src.database.queries import *
from src.instance.InstanceList import InstanceList
from src.instance.TSP_Instance import TSP_from_index_file
from src.solver.TSP_LKH_Solver import TSP_LKH_Solver

In [4]:
MAX_COST = 8.9
MAX_TIME = 0.89

generators = [
    "cluster_netgen",
    "compression",
    "expansion",
    "explosion",
    "grid",
    "cluster",
    "implosion",
    "linearprojection",
    "rotation",
    "uniform_portgen",
]

test_instances = TSP_from_index_file(filepath=DATA_DIR / "TSP" / "TEST_600" / "index.json")

id_to_generator_name = {
    instance.id(): instance.filepath.parts[-2] for instance in test_instances
}

id_to_name = {
    instance.id(): instance.filepath.parts[-1] for instance in test_instances
}

In [27]:
def agg_cost(x):
    count = x.shape[0]
    x = x[x < MAX_COST]
    x = x.sort_values()
    idx = count // 2
    if x.shape[0] == 0:
        return MAX_TIME
    if x.shape[0] <= idx:
        return x.iloc[-1]
    return x.iloc[idx]
        

db_path = DATABASE_DIR / "run-plain-30-980863.db"
db = DB(db_path)
results = pd.read_sql_query("SELECT * FROM results", db._conn)
series = (
    results.loc[results["prefix"].str.startswith("test")]
    .groupby(["instance_id", "prefix"])["cost"]
    .min()
    .reset_index()
    .groupby("instance_id")["cost"]
    .agg(agg_cost)
)
series

instance_id
1010497343408354633    0.84
1025694743545219321    0.89
1029779678527399606    0.88
1033334384195915531    0.84
1143605267971830603    0.72
1149436184441925046    0.87
1243460845889619478    0.89
1282737481889693780    0.88
1299453198094859446    0.83
1321239327924670684    0.88
1369516250743366905    0.85
1375004723134567192    0.89
1488905002894265172    0.72
167305539516809641     0.88
1681077104121529789    0.88
1688935472143186984    0.87
1773516854761867851    0.73
1775480472424010466    0.68
1785824271338802418    0.78
1826544890693893177    0.86
1829532370807232624    0.88
1845525838569682312    0.86
1861016368352127022    0.80
1885940596331070073    0.88
2062957692107227897    0.79
2063193798411162182    0.88
2100663487041686966    0.88
2158590443837813332    0.85
2220848900251283179    0.88
2233975135233362402    0.76
2251427101580533757    0.88
229164915310212350     0.87
257376688141377312     0.56
297704127069533155     0.88
303409038746208311     0.89
36216750

In [42]:
def agg_prefix(prefix):

    frames = []

    def agg_cost(x):
        count = x.shape[0]
        x = x[x < MAX_COST]
        x = x.sort_values()
        idx = count // 2
        print(x.shape[0])
        if x.shape[0] == 0:
            return np.nan
        if x.shape[0] <= idx:
            return x.iloc[-1]
        return x.iloc[idx]
        

    for idx, db_path in enumerate(DATABASE_DIR.glob(f"{prefix}-*.db")):
        print(db_path)
        db = DB(db_path)
        results = pd.read_sql_query("SELECT * FROM results", db._conn)
        series = (
            results.loc[results["prefix"].str.startswith("test")]
            .groupby(["instance_id", "prefix"])["cost"]
            .min()
            .reset_index()
            .groupby("instance_id")["cost"]
            .agg(agg_cost)
        )
        frames.append(series)
        

    df = (
        pd.concat(frames, axis=1)
        .mean(axis=1)
        .round(3)
        .rename(prefix)
        .to_frame()
    )

    df["generator"] = df.index.map(id_to_generator_name)
    df["name"] = df.index.map(id_to_name)
    df = df.pivot_table(index="generator", columns="name", values=prefix).loc[generators, :]
    return df

n = 30
plain = agg_prefix(f"run-plain-{n}")
plain = plain.assign(generator=f"plain-{n}").set_index("generator", append=True)

C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-30-980860.db
37
10
18
44
32
68
51
61
57
25
3
25
36
9
50
32
78
28
10
11
66
43
61
48
5
13
65
96
28
41
48
27
18
29
58
46
39
95
41
18
71
40
27
31
29
22
94
19
59
66
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-30-980861.db
45
19
49
22
38
84
58
56
76
72
17
48
88
70
98
84
97
69
68
62
90
70
39
61
47
17
86
97
50
53
47
48
32
73
34
73
89
100
44
54
51
34
36
66
87
69
98
90
96
97
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-30-980862.db
99
61
94
98
73
88
96
84
62
63
14
90
100
86
100
98
100
86
59
99
92
99
28
74
78
32
99
89
63
55
81
92
50
100
75
75
77
100
65
70
68
52
60
79
84
92
97
85
88
100
C:\Users\zakrz\Documents\DataScience\praca magisterska\raw-algorithm-portfolios-main\database\run-plain-30-980863.db
24
0
14
5
4
23
0
15
10
24
2
0
62
18
48
19
63
3
18
17
41
4
1
8
7
11
25
1
17
63


In [38]:
df = plain.copy().sort_index(level=0).loc[generators]   
df["mean"] = df.mean(axis=1)

In [39]:
df.style.background_gradient(cmap="Reds", vmin=0, vmax=5).format("{:.3f}")

Unnamed: 0_level_0,name,000.tsp,001.tsp,002.tsp,003.tsp,004.tsp,mean
generator,generator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cluster_netgen,plain-30,0.709,0.82,0.839,0.779,0.516,0.733
compression,plain-30,0.269,0.581,0.663,0.7,0.675,0.578
expansion,plain-30,0.675,0.776,0.803,0.559,0.729,0.708
explosion,plain-30,0.352,0.43,0.767,0.57,0.758,0.575
grid,plain-30,0.215,0.47,0.739,0.427,0.604,0.491
cluster,plain-30,0.562,0.606,0.626,0.564,0.756,0.623
implosion,plain-30,0.618,0.598,0.418,0.69,0.823,0.629
linearprojection,plain-30,0.528,0.547,0.595,0.382,0.723,0.555
rotation,plain-30,0.768,0.758,0.775,0.773,0.648,0.744
uniform_portgen,plain-30,0.652,0.688,0.438,0.661,0.833,0.654


In [40]:
plain_total_mean = plain.mean(axis=1).mean(axis=0)
print(f"{plain_total_mean=:.3f}")

plain_total_mean=0.629
