In [33]:
from pathlib import Path
import tomllib
import pandas as pd

with (Path().cwd()/".."/"config.toml").open("rb") as f:
    config = tomllib.load(f)
    config_exp = config[Path().cwd().name]
    config_input = config_exp["input"]

In [34]:
instance_infos = pd.read_json(Path("..")/config_input["baseline_algs"])[["Instance", "#Variables", "#Clauses"]].rename(columns={"#Variables": "n", "#Clauses": "m", "Instance": "instance"}).groupby("instance").mean()
instance_infos.to_json(config["04_compare"]["output"]["instance_infos"])
instance_infos

Unnamed: 0_level_0,n,m
instance,Unnamed: 1_level_1,Unnamed: 2_level_1
APL,23.0,35.0
APL-Model,28.0,40.0
BankingSoftware,176.0,280.0
BattleofTanks,144.0,769.0
ChatClient,14.0,20.0
DMIE,366.0,627.0
E-Shop,326.0,499.0
EMBToolkit,1179.0,5414.0
FameDB,22.0,40.0
FeatureIDE,19.0,27.0


In [25]:
baseline_data = pd.read_json(Path("..")/config_input["baseline_algs"])
baseline_data.rename(columns={"Instance": "instance", "SampleSize": "upper_bound"}, inplace=True)
def name_algorithms(row):
    if "YASA" in row["Algorithm"]:
        m = row["Settings"].split("_")[1][1:]
        return f"YASA (m={m})"
    else:
        return row["Algorithm"].replace("FIDE-","")
baseline_data["algorithm"] = baseline_data.apply(name_algorithms, axis=1)
print(baseline_data["algorithm"].unique())
baseline_data["runtime"] = baseline_data["Time"]/1000
baseline_data = baseline_data[['instance', 'algorithm', 'upper_bound', "runtime"]]
baseline_data

['ICPL' 'Chvatal' 'Incling' 'YASA (m=1)' 'YASA (m=3)' 'YASA (m=5)'
 'YASA (m=10)' 'ACTS-IPOG-FT' 'ACTS-IPOG-CSP' 'ACTS-IPOF-FT'
 'ACTS-IPOF-CSP']


Unnamed: 0,instance,algorithm,upper_bound,runtime
0,PPU,ICPL,15.0,1.706
1,PPU,Chvatal,15.0,1.455
2,PPU,Incling,17.0,0.171
3,PPU,YASA (m=1),13.0,0.266
4,PPU,YASA (m=3),12.0,0.290
...,...,...,...,...
2580,eCos-3-0_i386pc,YASA (m=10),68.0,24.011
2581,eCos-3-0_i386pc,ACTS-IPOG-FT,,900.001
2582,eCos-3-0_i386pc,ACTS-IPOG-CSP,,900.008
2583,eCos-3-0_i386pc,ACTS-IPOF-FT,,900.008


In [26]:
yasa_15min_data = pd.read_json(Path("..")/config_input["yasa_15min"]).drop(columns=["path"])
yasa_15min_data["algorithm"] = "YASA (15min)"
yasa_15min_data["runtime"] = 900.0
yasa_15min_data.rename(columns={"sample_size": "upper_bound"}, inplace=True)
yasa_15min_data

Unnamed: 0,instance,upper_bound,algorithm,runtime
0,soletta_2017-03-09_21-02-40,63,YASA (15min),900.0
1,soletta_2017-03-09_21-02-40,64,YASA (15min),900.0
2,soletta_2017-03-09_21-02-40,62,YASA (15min),900.0
3,soletta_2017-03-09_21-02-40,63,YASA (15min),900.0
4,soletta_2017-03-09_21-02-40,63,YASA (15min),900.0
...,...,...,...,...
230,am31_sim,69,YASA (15min),900.0
231,am31_sim,72,YASA (15min),900.0
232,am31_sim,70,YASA (15min),900.0
233,am31_sim,72,YASA (15min),900.0


In [27]:
baseline_df = pd.concat([baseline_data, yasa_15min_data], ignore_index=True)
baseline_df

Unnamed: 0,instance,algorithm,upper_bound,runtime
0,PPU,ICPL,15.0,1.706
1,PPU,Chvatal,15.0,1.455
2,PPU,Incling,17.0,0.171
3,PPU,YASA (m=1),13.0,0.266
4,PPU,YASA (m=3),12.0,0.290
...,...,...,...,...
2815,am31_sim,YASA (15min),69.0,900.000
2816,am31_sim,YASA (15min),72.0,900.000
2817,am31_sim,YASA (15min),70.0,900.000
2818,am31_sim,YASA (15min),72.0,900.000


In [28]:
path = Path(config["04_compare"]["output"]["baseline_data"])
path.parent.mkdir(parents=True, exist_ok=True)
baseline_df.to_json(path)
baseline_df

Unnamed: 0,instance,algorithm,upper_bound,runtime
0,PPU,ICPL,15.0,1.706
1,PPU,Chvatal,15.0,1.455
2,PPU,Incling,17.0,0.171
3,PPU,YASA (m=1),13.0,0.266
4,PPU,YASA (m=3),12.0,0.290
...,...,...,...,...
2815,am31_sim,YASA (15min),69.0,900.000
2816,am31_sim,YASA (15min),72.0,900.000
2817,am31_sim,YASA (15min),70.0,900.000
2818,am31_sim,YASA (15min),72.0,900.000


In [29]:
best_upper_bound_by_baseline = baseline_df.groupby(["instance"])["upper_bound"].min().reset_index()
best_upper_bound_by_baseline

Unnamed: 0,instance,upper_bound
0,APL,9.0
1,APL-Model,10.0
2,BankingSoftware,40.0
3,BattleofTanks,451.0
4,ChatClient,7.0
5,DMIE,26.0
6,E-Shop,19.0
7,EMBToolkit,1881.0
8,FameDB,8.0
9,FeatureIDE,9.0


In [30]:
samplns_15min = pd.read_json(Path("..")/config_input["samplns_15min"])
samplns_15min["algorithm"] = "SampLNS (15min)"
samplns_15min

Unnamed: 0,instance_name,lb,ub,iteration,runtime,timestamp,hostname,time_used_by_yasa,time_of_last_update,algorithm
0,berkeleyDB1,15,15,1,127.061744,2024-02-29 22:34:46.548,algra02,0.444649,70.800291,SampLNS (15min)
1,BattleofTanks,256,324,2,905.864063,2024-02-29 23:48:03.136,algra06,0.757872,899.401329,SampLNS (15min)
2,uclibc_2008-06-05_13-46-47,505,505,4,112.978185,2024-03-01 01:59:14.695,algra04,1.563936,109.876549,SampLNS (15min)
3,APL,7,7,0,1.150905,2024-02-29 21:35:43.570,algra03,0.364628,0.782022,SampLNS (15min)
4,ChatClient,7,7,0,2.577169,2024-02-29 21:35:41.952,algra05,0.565027,2.004380,SampLNS (15min)
...,...,...,...,...,...,...,...,...,...,...
235,FreeBSD-8_0_0,29,47,0,917.751250,2024-02-29 22:23:02.911,algra03,11.911315,809.872718,SampLNS (15min)
236,lcm,6,6,2,0.314571,2024-02-29 23:36:06.011,algra03,0.273436,0.037731,SampLNS (15min)
237,integrator_arm7,28,40,2,939.704633,2024-03-01 00:22:24.083,algra02,3.890440,738.614761,SampLNS (15min)
238,fiasco_2017-09-26_11-30-56,225,226,1,905.998053,2024-02-29 22:45:22.583,algra06,1.046048,256.179602,SampLNS (15min)


In [31]:
samplns_3h = pd.read_json(Path("..")/config_input["samplns_3h"])
samplns_3h["algorithm"] = "SampLNS (3h)"
samplns_3h

Unnamed: 0,instance_name,lb,ub,iteration,runtime,timestamp,hostname,time_used_by_yasa,time_of_last_update,algorithm
0,DMIE,16,16,2,84.251726,2024-03-03 13:52:56.774,algra05,1.151671,82.275646,SampLNS (3h)
1,XSEngine,32,37,2,10835.245833,2024-03-03 19:04:44.852,algra02,3.947085,861.827745,SampLNS (3h)
2,busybox_2020-12-16_21-53-05,19,20,2,10812.507526,2024-03-03 16:21:47.983,algra01,3.722179,1315.498623,SampLNS (3h)
3,dell,31,31,1,45.378754,2024-03-03 03:18:58.480,algra04,0.430485,44.940805,SampLNS (3h)
4,aaed2000,51,52,0,10958.007358,2024-03-03 00:52:45.945,algra02,4.545260,5245.348895,SampLNS (3h)
...,...,...,...,...,...,...,...,...,...,...
230,PPU,12,12,2,4.602942,2024-03-03 11:44:34.043,algra03,0.406477,0.836773,SampLNS (3h)
231,fiasco_2017-09-26_11-30-56,225,225,4,323.909784,2024-03-04 06:45:12.567,algra04,1.074143,280.138028,SampLNS (3h)
232,ea2468,32,37,1,10894.844934,2024-03-03 10:51:18.668,algra05,4.102623,2573.002214,SampLNS (3h)
233,E-Shop,10,12,3,10841.477915,2024-03-04 17:32:52.372,algra03,1.290838,2133.181815,SampLNS (3h)


In [38]:
best_ub = samplns_3h.groupby(["instance_name"])["ub"].min()
best_lb = samplns_3h.groupby(["instance_name"])["lb"].max()
time_last_update_3h = samplns_3h.groupby(["instance_name"])["time_of_last_update"].mean()
samplns_lb = samplns_15min.groupby(["instance_name"])["lb"].mean()
samplns_ub = samplns_15min.groupby(["instance_name"])["ub"].mean()
time_last_update_15min = samplns_15min.groupby(["instance_name"])["time_of_last_update"].mean()
time_initial_solution = samplns_15min.groupby(["instance_name"])["time_used_by_yasa"].mean()
baseline_ub = best_upper_bound_by_baseline.groupby(["instance"])["upper_bound"].min()
# combine to columns
df = pd.DataFrame()
df['num_vars'] = instance_infos["n"]
df['num_clauses'] = instance_infos["m"]
df["best_ub"] = best_ub
df["best_lb"] = best_lb
df["samplns_ub"] = samplns_ub
df["samplns_lb"] = samplns_lb
df["baseline_ub"] = baseline_ub
df["time_initial_solution"] = time_initial_solution
df["time_last_update_15min"] = time_initial_solution+time_last_update_15min
df["time_last_update_3h"] = time_initial_solution+time_last_update_3h
df.sort_values(by=["num_vars", "num_clauses"], inplace=True)
df.to_json(config["04_compare"]["output"]["aggregated_table"])
df

Unnamed: 0_level_0,num_vars,num_clauses,best_ub,best_lb,samplns_ub,samplns_lb,baseline_ub,time_initial_solution,time_last_update_15min,time_last_update_3h
instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
calculate,9.0,15.0,5,5,5.0,5.0,9.0,0.349753,0.409805,0.509709
lcm,9.0,16.0,6,6,6.0,6.0,8.0,0.320516,0.362959,0.459972
email,10.0,17.0,6,6,6.0,6.0,6.0,0.320861,0.390224,0.476566
ChatClient,14.0,20.0,7,7,7.0,7.0,7.0,0.370952,1.386273,1.897152
toybox_2006-10-31_23-30-06,16.0,13.0,8,8,8.0,8.0,9.0,0.392972,1.462752,1.492623
car,16.0,33.0,5,5,5.0,5.0,6.0,0.285722,0.350269,0.417284
FeatureIDE,19.0,27.0,8,8,8.0,8.0,9.0,0.372879,270.834257,127.857561
FameDB,22.0,40.0,8,8,8.0,8.0,8.0,0.325365,1.14209,1.180048
APL,23.0,35.0,7,7,7.0,7.0,9.0,0.37691,1.216988,1.243059
SafeBali,24.0,45.0,11,11,11.0,11.0,11.0,0.289868,0.348789,0.371186


In [36]:
num_instance_with_opt_solution = (df["best_lb"]==df["best_ub"]).sum()
num_instance_with_opt_solution

30

In [37]:
num_instance_with_opt_solution/len(df)

0.6382978723404256

In [39]:
(df["samplns_lb"].round()==df["samplns_ub"].round()).sum()

28