In [18]:
import os
import sys

if "COLAB_GPU" in os.environ:
  from google.colab import drive
  print("Hello, Colab")
  drive.mount("/content/drive")
  ROOT_PATH = "/content/drive/MyDrive/hero"
  os.environ["CLEARML_CONFIG_FILE"] = f"{ROOT_PATH}/clearml.conf"
elif "PAPERSPACE_CLUSTER_ID" in os.environ:
  print("Hello, Paperspace")
  ROOT_PATH = "/notebooks/hero"
  os.environ["CLEARML_CONFIG_FILE"] = f"{ROOT_PATH}/clearml.conf"
else:
  print("Hello, Local PC")
  ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))

BTCNN_PATH = os.path.join(ROOT_PATH, "btcnn")
HBO_BENCH_PATH = os.path.join(ROOT_PATH, "hbo_bench")

sys.path.insert(0, ROOT_PATH)
sys.path.insert(0, BTCNN_PATH)
sys.path.insert(0, HBO_BENCH_PATH)

EXPERIMENT_PATH = f"{ROOT_PATH}/experiments/emulation"
ARTIFACTS_PATH = f"{EXPERIMENT_PATH}/artifacts"

Hello, Paperspace


In [19]:
import random
from collections import defaultdict
from matplotlib import pyplot as plt
from json import dump, load

import torch
import pandas as pd
import numpy as np

from hero import Hero
from wrappers import ORACLES_DICT, initialize_oracles, _get_e2e_time, _get_execution_time, _get_planning_time, _get_logical_tree
from neural_network import NN
from hbo_bench.local_search_settings import *
from neural_network import get_bt_regressor
from train_utils import load_model
from emulation import get_report, emulate_online_learning
from hbo_bench.data_config import DEFAULT_DOP, DEFAULT_HINTSET

In [20]:
initialize_oracles(HBO_BENCH_PATH, ["JOB", "sample_queries"])

In [21]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device is {DEVICE}")

device is cuda


In [22]:
job_workload = ORACLES_DICT["JOB"].get_query_names()
sq_workload = ORACLES_DICT["sample_queries"].get_query_names()

# Static Workload

## Ideal Case (all plans are in train data)

### Default Dop

In [6]:
epochs = 500

job_ideal_model = NN(
    fit_settings=ALL_SS, 
    inference_settings=EMPTY_SS, 
    model=get_bt_regressor("job_ideal_model", DEVICE),
    path_to_save=f"{EXPERIMENT_PATH}/models/job_ideal_model.pth"
)
job_ideal_model.fit(job_workload, epochs=epochs)
job_ideal_model = load_model(DEVICE, f"{EXPERIMENT_PATH}/models/job_ideal_model.pth", get_bt_regressor("none", DEVICE))

sq_ideal_model = NN(
    fit_settings=ALL_SS, 
    inference_settings=EMPTY_SS, 
    model=get_bt_regressor("sq_ideal_model", DEVICE),
    path_to_save=f"{EXPERIMENT_PATH}/models/sq_ideal_model.pth"
)
sq_ideal_model.fit(sq_workload, epochs=epochs)
sq_ideal_model = load_model(DEVICE, f"{EXPERIMENT_PATH}/models/sq_ideal_model.pth", get_bt_regressor("none", DEVICE))

[500/500] MSE: 0.0308: 100%|██████████| 500/500 [13:34<00:00,  1.63s/it]
[500/500] MSE: 0.5229: 100%|██████████| 500/500 [08:45<00:00,  1.05s/it]  


In [7]:
def extend_df(df):
    df["ex boost (% of opt)"] = 100 * (df["def_ex"] - df["custom_ex"]) / (df["def_ex"] - df["opt_ex"])
    df["e2e boost (% of opt)"] = 100 * (df["def_e2e"] - df["custom_e2e"]) / (df["def_e2e"] - df["opt_e2e"])
    df["e2e boost (%)"] = 100 * (df["def_e2e"] - df["custom_e2e"]) / df["def_e2e"]
    
    columns = [
        "model", 
        "searching_settings", 
        "workload", 
        "e2e boost (%)", 
        "e2e boost (% of opt)", 
        "ex boost (% of opt)", 
        "n_timeouts (%)", 
        "n_real_degradations (%)",
        "custom_e2e", 
        "custom_ex", 
        "custom_inference", 
        "only_def_dop", 
    ]
    
    def count_real_degradations(predictions):
        return sum(
            _get_e2e_time(q_n, hs, dop, False) > 1.1 * _get_e2e_time(q_n, DEFAULT_HINTSET, DEFAULT_DOP, False)
            for q_n, hs, dop in predictions
        )
    sizes = df["predictions"].apply(lambda el: len(el))
    df["n_timeouts (%)"] = 100 * df["n_timeouts"].apply(lambda el: int(el)) / sizes
    df["n_real_degradations (%)"] = 100 * df["predictions"].apply(count_real_degradations) / sizes
    df = df[columns]
    return df.round(1)

In [8]:
ss_and_descrs = [
    (GREEDY_DEF_DOP_SS, "greedy"), 
    (PRUNED_GREEDY_DEF_DOP_SS, "pruned greedy"), 
    (LOCAL_DEF_DOP_SS, "local"),
    (PRUNED_LOCAL_DEF_DOP_SS, "pruned local"),
    (ALL_DEF_DOP_SS, "exhaustive")
]

def_dop_ideal_reports = []
for ss, ss_descr in ss_and_descrs:
    ideal_job_nn_model = NN(fit_settings=EMPTY_SS, inference_settings=ss, model=get_bt_regressor("ideal_job", DEVICE))
    ideal_job_nn_model.model = load_model(device=DEVICE, path=f"{EXPERIMENT_PATH}/models/job_ideal_model.pth", model=ideal_job_nn_model.model)
    def_dop_ideal_reports.append(get_report(ideal_job_nn_model, "NN", job_workload, "JOB", ss, ss_descr, only_def_dop=True))

    ideal_job_hero_model = Hero(fit_settings=ss)
    ideal_job_hero_model.fit(job_workload)
    def_dop_ideal_reports.append(get_report(ideal_job_hero_model, "Hero", job_workload, "JOB", ss, ss_descr, only_def_dop=True))
    
    ideal_sq_nn_model = NN(fit_settings=EMPTY_SS, inference_settings=ss, model=get_bt_regressor("ideal_sq", DEVICE))
    ideal_sq_nn_model.model = load_model(device=DEVICE, path=f"{EXPERIMENT_PATH}/models/sq_ideal_model.pth", model=ideal_sq_nn_model.model)
    def_dop_ideal_reports.append(get_report(ideal_sq_nn_model, "NN", sq_workload, "SQ", ss, ss_descr, only_def_dop=True))

    ideal_sq_hero_model = Hero(fit_settings=ss)
    ideal_sq_hero_model.fit(sq_workload)
    def_dop_ideal_reports.append(get_report(ideal_sq_hero_model, "Hero", sq_workload, "SQ", ss, ss_descr, only_def_dop=True))

with open(f"{ARTIFACTS_PATH}/def_dop_ideal_reports.json", "w") as f:
    dump(def_dop_ideal_reports, f)

In [9]:
with open(f"{ARTIFACTS_PATH}/def_dop_ideal_reports.json", "r") as f:
    def_dop_df = extend_df(pd.DataFrame(load(f)))

In [10]:
def_dop_df[(def_dop_df["workload"] == "JOB")].sort_values(by="e2e boost (% of opt)", ascending=False)

Unnamed: 0,model,searching_settings,workload,e2e boost (%),e2e boost (% of opt),ex boost (% of opt),n_timeouts (%),n_real_degradations (%),custom_e2e,custom_ex,custom_inference,only_def_dop
9,Hero,local,JOB,46.8,97.0,96.4,0.0,0.0,291.1,266.8,24.2,True
1,Hero,greedy,JOB,45.3,94.0,91.4,0.0,0.0,298.9,276.6,22.3,True
13,Hero,pruned local,JOB,38.4,79.6,89.0,0.0,0.0,336.9,281.3,55.6,True
12,NN,pruned local,JOB,34.2,70.9,87.4,0.0,1.8,359.7,284.5,75.3,True
5,Hero,pruned greedy,JOB,28.5,59.2,61.3,0.0,0.0,390.6,335.4,55.3,True
4,NN,pruned greedy,JOB,24.7,51.2,59.2,0.0,1.8,411.8,339.6,72.2,True
8,NN,local,JOB,16.1,33.5,95.0,0.9,1.8,458.4,269.5,188.9,True
0,NN,greedy,JOB,15.0,31.1,89.1,0.0,0.9,464.6,281.2,183.5,True
16,NN,exhaustive,JOB,11.7,24.2,97.3,5.3,7.1,482.8,265.0,217.8,True
17,Hero,exhaustive,JOB,0.0,0.0,0.0,0.0,0.0,546.7,455.3,91.4,True


In [11]:
def_dop_df[(def_dop_df["workload"] == "SQ")].sort_values(by="e2e boost (% of opt)", ascending=False)

Unnamed: 0,model,searching_settings,workload,e2e boost (%),e2e boost (% of opt),ex boost (% of opt),n_timeouts (%),n_real_degradations (%),custom_e2e,custom_ex,custom_inference,only_def_dop
11,Hero,local,SQ,64.5,93.9,96.1,0.0,0.0,273.2,240.1,33.1,True
3,Hero,greedy,SQ,63.2,92.0,93.2,0.0,0.0,283.6,254.2,29.4,True
15,Hero,pruned local,SQ,58.3,84.9,89.3,0.0,0.0,320.7,273.4,47.3,True
14,NN,pruned local,SQ,54.0,78.7,89.2,0.0,2.5,353.9,273.7,80.2,True
7,Hero,pruned greedy,SQ,53.0,77.1,80.1,0.0,0.0,361.9,318.3,43.6,True
18,NN,exhaustive,SQ,52.6,76.5,99.4,7.5,10.0,365.1,223.8,141.3,True
6,NN,pruned greedy,SQ,50.5,73.6,80.0,0.0,2.5,380.8,318.9,61.9,True
2,NN,greedy,SQ,44.9,65.4,92.1,0.0,2.5,424.1,259.5,164.6,True
10,NN,local,SQ,40.8,59.4,95.3,5.0,7.5,455.5,243.6,211.9,True
19,Hero,exhaustive,SQ,0.0,0.0,0.0,0.0,0.0,769.7,711.5,58.2,True


### All Dop's

In [12]:
ss_and_descrs = [
    (GREEDY_SS, "greedy"), 
    (PRUNED_GREEDY_SS, "pruned greedy"), 
    (LOCAL_SS, "local"),
    (PRUNED_LOCAL_SS, "pruned local"),
    (ALL_SS, "exhaustive")
]

all_dops_ideal_reports = []
for ss, ss_descr in ss_and_descrs:
    ideal_job_nn_model = NN(fit_settings=EMPTY_SS, inference_settings=ss, model=get_bt_regressor("ideal_job", DEVICE))
    ideal_job_nn_model.model = load_model(device=DEVICE, path=f"{EXPERIMENT_PATH}/models/job_ideal_model.pth", model=ideal_job_nn_model.model)
    all_dops_ideal_reports.append(get_report(ideal_job_nn_model, "NN", job_workload, "JOB", ss, ss_descr, only_def_dop=False))

    ideal_job_hero_model = Hero(fit_settings=ss)
    ideal_job_hero_model.fit(job_workload)
    all_dops_ideal_reports.append(get_report(ideal_job_hero_model, "Hero", job_workload, "JOB", ss, ss_descr, only_def_dop=False))
    
    ideal_sq_nn_model = NN(fit_settings=EMPTY_SS, inference_settings=ss, model=get_bt_regressor("ideal_sq", DEVICE))
    ideal_sq_nn_model.model = load_model(device=DEVICE, path=f"{EXPERIMENT_PATH}/models/sq_ideal_model.pth", model=ideal_sq_nn_model.model)
    all_dops_ideal_reports.append(get_report(ideal_sq_nn_model, "NN", sq_workload, "SQ", ss, ss_descr, only_def_dop=False))

    ideal_sq_hero_model = Hero(fit_settings=ss)
    ideal_sq_hero_model.fit(sq_workload)
    all_dops_ideal_reports.append(get_report(ideal_sq_hero_model, "Hero", sq_workload, "SQ", ss, ss_descr, only_def_dop=False))

with open(f"{ARTIFACTS_PATH}/all_dops_ideal_reports.json", "w") as f:
    dump(all_dops_ideal_reports, f)

In [13]:
with open(f"{ARTIFACTS_PATH}/all_dops_ideal_reports.json", "r") as f:
    all_dops_df = extend_df(pd.DataFrame(load(f)))

In [14]:
all_dops_df[(all_dops_df["workload"] == "JOB")].sort_values(by="e2e boost (% of opt)", ascending=False)

Unnamed: 0,model,searching_settings,workload,e2e boost (%),e2e boost (% of opt),ex boost (% of opt),n_timeouts (%),n_real_degradations (%),custom_e2e,custom_ex,custom_inference,only_def_dop
9,Hero,local,JOB,64.6,98.4,98.9,0.0,0.0,193.4,166.7,26.7,False
13,Hero,pruned local,JOB,61.2,93.3,97.2,0.0,0.0,212.0,171.9,40.1,False
12,NN,pruned local,JOB,56.2,85.7,94.1,0.9,2.7,239.2,180.7,58.5,False
1,Hero,greedy,JOB,51.2,78.0,70.5,0.0,0.0,266.7,249.5,17.2,False
8,NN,local,JOB,37.5,57.2,97.5,4.4,5.3,341.4,170.7,170.7,False
5,Hero,pruned greedy,JOB,33.2,50.5,49.1,0.0,0.9,365.4,312.0,53.5,False
4,NN,pruned greedy,JOB,28.7,43.7,47.0,0.0,1.8,389.8,318.3,71.5,False
0,NN,greedy,JOB,20.2,30.8,68.4,1.8,2.7,436.1,255.7,180.4,False
17,Hero,exhaustive,JOB,0.0,0.0,0.0,0.0,0.0,546.7,455.3,91.4,False
16,NN,exhaustive,JOB,-17.8,-27.0,98.3,8.8,10.6,643.7,168.6,475.2,False


In [15]:
all_dops_df[(all_dops_df["workload"] == "SQ")].sort_values(by="e2e boost (% of opt)", ascending=False)

Unnamed: 0,model,searching_settings,workload,e2e boost (%),e2e boost (% of opt),ex boost (% of opt),n_timeouts (%),n_real_degradations (%),custom_e2e,custom_ex,custom_inference,only_def_dop
11,Hero,local,SQ,69.6,94.0,94.8,0.0,0.0,234.0,208.4,25.6,False
15,Hero,pruned local,SQ,66.9,90.4,91.8,0.0,0.0,254.6,224.2,30.3,False
3,Hero,greedy,SQ,65.3,88.2,89.7,0.0,0.0,267.4,235.3,32.1,False
14,NN,pruned local,SQ,59.1,79.8,90.7,0.0,2.5,314.9,230.1,84.7,False
7,Hero,pruned greedy,SQ,54.9,74.2,76.6,0.0,0.0,347.1,305.1,42.0,False
6,NN,pruned greedy,SQ,52.3,70.7,76.7,0.0,2.5,366.9,304.6,62.3,False
10,NN,local,SQ,47.7,64.5,94.1,5.0,7.5,402.3,212.0,190.3,False
2,NN,greedy,SQ,44.3,59.9,88.0,5.0,7.5,428.6,244.2,184.4,False
18,NN,exhaustive,SQ,38.0,51.4,98.4,5.0,7.5,477.1,189.4,287.7,False
19,Hero,exhaustive,SQ,0.0,0.0,0.0,0.0,0.0,769.7,711.5,58.2,False


We can see that even in the ideal scenario, the NN loses a bit, mainly due to the longer inference time. Moreover, it sometimes leads to degradations and even `T/O` (this probably happens on small queries).

The advantage of a `PRUNED LOCAL` strategy is also evident.

## Online Scenario

Collecting train data during online optimisation.

In [23]:
for workload, workload_name in [(job_workload, "JOB"), (sq_workload, "SQ")]:
    def_dop_list_online_reports = []
    all_dops_list_online_reports = []

    epochs, iterations = 100, 5
    for ss, ss_descr in [
        (GREEDY_DEF_DOP_SS, "GREEDY"),
        #(PRUNED_GREEDY_DEF_DOP_SS, "PRUNED GREEDY"),
        (LOCAL_DEF_DOP_SS, "LOCAL"),
        (PRUNED_LOCAL_DEF_DOP_SS, "PRUNED LOCAL"),
        #(ALL_DEF_DOP_SS, "EXHAUSTIVE"),
    ]:
        def_dop_list_online_reports.append(emulate_online_learning("NN", workload, workload_name, ss, ss_descr, True, True, epochs, iterations, None, DEVICE))

    for ss, ss_descr in [
        (GREEDY_SS, "GREEDY"),
        #(PRUNED_GREEDY_SS, "PRUNED GREEDY"),
        (LOCAL_SS, "LOCAL"),
        (PRUNED_LOCAL_SS, "PRUNED LOCAL"),
        #(ALL_SS, "EXHAUSTIVE"),
    ]:          
        all_dops_list_online_reports.append(emulate_online_learning("NN", workload, workload_name, ss, ss_descr, False, True, epochs, iterations, None, DEVICE))
    
    with open(f"{ARTIFACTS_PATH}/{workload_name}_def_dop_list_online_reports.json", "w") as f:
        dump(def_dop_list_online_reports, f)
    with open(f"{ARTIFACTS_PATH}/{workload_name}_all_dops_list_online_reports.json", "w") as f:
        dump(all_dops_list_online_reports, f)

[20/100] MSE: 73.0167:  20%|██        | 20/100 [01:01<04:07,  3.09s/it]


KeyboardInterrupt: 

In [17]:
for workload, workload_name in [(job_workload, "JOB"), (sq_workload, "SQ")]:
    def_dop_list_online_reports = []
    all_dops_list_online_reports = []

    epochs, iterations = 100, 5
    for ss, ss_descr in [
        (GREEDY_DEF_DOP_SS, "GREEDY"),
        #(PRUNED_GREEDY_DEF_DOP_SS, "PRUNED GREEDY"),
        (LOCAL_DEF_DOP_SS, "LOCAL"),
        (PRUNED_LOCAL_DEF_DOP_SS, "PRUNED LOCAL"),
        #(ALL_DEF_DOP_SS, "EXHAUSTIVE"),
    ]:
        def_dop_list_online_reports.append(emulate_online_learning("NN", workload, workload_name, ss, ss_descr, True, True, epochs, iterations, None, DEVICE))

    for ss, ss_descr in [
        (GREEDY_SS, "GREEDY"),
        #(PRUNED_GREEDY_SS, "PRUNED GREEDY"),
        (LOCAL_SS, "LOCAL"),
        (PRUNED_LOCAL_SS, "PRUNED LOCAL"),
        #(ALL_SS, "EXHAUSTIVE"),
    ]:          
        all_dops_list_online_reports.append(emulate_online_learning("NN", workload, workload_name, ss, ss_descr, False, True, epochs, iterations, None, DEVICE))
    
    with open(f"{ARTIFACTS_PATH}/{workload_name}_def_dop_list_online_reports.json", "w") as f:
        dump(def_dop_list_online_reports, f)
    with open(f"{ARTIFACTS_PATH}/{workload_name}_all_dops_list_online_reports.json", "w") as f:
        dump(all_dops_list_online_reports, f)

[100/100] MSE: 0.1894: 100%|██████████| 100/100 [01:04<00:00,  1.54it/s]
[100/100] MSE: 0.5487: 100%|██████████| 100/100 [06:46<00:00,  4.06s/it]
[17/100] MSE: 12.4115:  17%|█▋        | 17/100 [00:17<01:23,  1.00s/it]


KeyboardInterrupt: 

In [None]:
def visualise(list_reports, title):
    fig, ax = plt.subplots(figsize=(16, 10))
    colors = {
        "opt": "green",
        "def": "orange"
    }

    x_values = np.arange(len(list_reports[0]))

    small_line, big_line = 2, 3
    for i, report in enumerate(list_reports):
        color = plt.cm.viridis(i / len(list_reports))
        ax.plot(
            x_values, 
            [el["custom_ex"] for el in report], 
            marker="o", 
            linewidth=small_line, 
            linestyle='--', 
            color=color, 
            label=f'{report[0]["searching_settings"]} Ex'
        )
        ax.plot(
            x_values, 
            [el["custom_e2e"] for el in report], 
            marker="o", 
            linewidth=big_line, 
            markersize=10.0, 
            linestyle='-', 
            color=color, 
            label=f'{report[0]["searching_settings"]} E2E'
        )

    for metric in ["opt", "def"]:
        for key, linewidth, linestyle, alpha in zip(["ex", "e2e"], [small_line, big_line], ["--", "-"], [0.4, 1.0]):
            value = list_reports[0][0][f"{metric}_{key}"]
            plt.plot(
                [0, len(list_reports[0])-1], 
                [value, value], 
                linewidth=linewidth, 
                color=colors[metric], 
                alpha=alpha, 
                linestyle=linestyle, 
                label=f'{metric}_{key}'
            )
    
    handles, labels = ax.get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    ax.legend(by_label.values(), by_label.keys())

    ax.set_xticks(range(len(list_reports[0])))
    ax.set_ylim(bottom=0)

    plt.xlabel("iteration")
    plt.ylabel("time (sec)")
    plt.title(f"Convergence of online learning on benchmark {title}")
    plt.show()

In [None]:
for workload, workload_name in [(job_workload, "JOB"), (sq_workload, "SQ")]:
    with open(f"{ARTIFACTS_PATH}/{workload_name}_def_dop_list_online_reports.json", "r") as f:
        def_dop_list_online_reports = load(f)
        visualise(def_dop_list_online_reports, title=workload_name + " [default DOP]")

    with open(f"{ARTIFACTS_PATH}/{workload_name}_all_dops_list_online_reports.json", "r") as f:
        all_dops_list_online_reports = load(f)
        visualise(all_dops_list_online_reports, title=workload_name + " [all DOPs]")

We see that, as the search space expands, the exhaustive algorithms stop working (at least on JOB). The superiority of the local search algorithm and the pruning procedure is also evident.

Moreover, it did not always converge to the optimum even in 25 iterations - this tells us that it makes sense to take the learning procedure offline (as it is done in `Hero`).

# Dynamic Scenario

## Split by Time

In [None]:
slow_reports, fast_reports = [], []

epochs = 1
for workload, workload_name in [(job_workload, "JOB"), (sq_workload, "SQ")]:
    workload = [q_n for q_n in workload if _get_e2e_time(q_n, DEFAULT_HINTSET, DEFAULT_DOP) > 1]
    print(len(workload))
    slow_time_treshold = np.quantile([_get_e2e_time(q_n, DEFAULT_HINTSET, DEFAULT_DOP) for q_n in workload], .5)
    slow_train = [q_n for q_n in workload if _get_e2e_time(q_n, DEFAULT_HINTSET, DEFAULT_DOP) > slow_time_treshold]
    slow_test = [q_n for q_n in workload if _get_e2e_time(q_n, DEFAULT_HINTSET, DEFAULT_DOP) <= slow_time_treshold]
    assert len(slow_train) + len(slow_test) == len(workload)
    slow_nnmodel = NN(fit_settings=ALL_SS, inference_settings=EMPTY_SS, model=get_bt_regressor("dummy", DEVICE))
    slow_nnmodel.fit(slow_train, epochs)

    fast_time_treshold = np.quantile([_get_e2e_time(q_n, DEFAULT_HINTSET, DEFAULT_DOP) for q_n in workload], .5)
    fast_train = [q_n for q_n in workload if _get_e2e_time(q_n, DEFAULT_HINTSET, DEFAULT_DOP) < fast_time_treshold]
    fast_test = [q_n for q_n in workload if _get_e2e_time(q_n, DEFAULT_HINTSET, DEFAULT_DOP) >= fast_time_treshold]
    assert len(fast_train) + len(fast_test) == len(workload)
    fast_nnmodel = NN(fit_settings=ALL_SS, inference_settings=EMPTY_SS, model=get_bt_regressor("dummy", DEVICE))
    fast_nnmodel.fit(fast_train, epochs)

    for ss, ss_descr in [
        (GREEDY_SS, "GREEDY"),
        (PRUNED_GREEDY_SS, "PRUNED GREEDY"),
        (LOCAL_SS, "LOCAL"),
        (PRUNED_LOCAL_SS, "PRUNED LOCAL"),
        (ALL_SS, "EXHAUSTIVE"),
    ]:
        slow_reports.append(get_report(slow_nnmodel, "NN", slow_train, f"{workload_name}[train]", ss, ss_descr, only_def_dop=False))
        slow_reports.append(get_report(slow_nnmodel, "NN", slow_test, f"{workload_name}[test]", ss, ss_descr, only_def_dop=False))
        slow_heromodel = Hero(ss)
        slow_heromodel.fit(slow_train)
        slow_reports.append(get_report(slow_heromodel, "Hero", slow_train, f"{workload_name}[train]", ss, ss_descr, only_def_dop=False))
        slow_reports.append(get_report(slow_heromodel, "Hero", slow_test, f"{workload_name}[test]", ss, ss_descr, only_def_dop=False))

        fast_reports.append(get_report(fast_nnmodel, "NN", fast_train, f"{workload_name}[train]", ss, ss_descr, only_def_dop=False))
        fast_reports.append(get_report(fast_nnmodel, "NN", fast_test, f"{workload_name}[test]", ss, ss_descr, only_def_dop=False))
        fast_heromodel = Hero(ss)
        fast_heromodel.fit(fast_train)
        fast_reports.append(get_report(fast_heromodel, "Hero", fast_train, f"{workload_name}[train]", ss, ss_descr, only_def_dop=False))
        fast_reports.append(get_report(fast_heromodel, "Hero", fast_test, f"{workload_name}[test]", ss, ss_descr, only_def_dop=False))

        with open(f"{ARTIFACTS_PATH}/{workload_name}_slow_reports.json", "w") as f:
            dump(slow_reports, f)
        with open(f"{ARTIFACTS_PATH}/{workload_name}_fast_reports.json", "w") as f:
            dump(fast_reports, f)

In [None]:
with open(f"{ARTIFACTS_PATH}/JOB_slow_reports.json", "r") as f:
    job_slow_df = extend_df(pd.DataFrame(load(f)))
with open(f"{ARTIFACTS_PATH}/JOB_fast_reports.json", "r") as f:
    job_fast_df = extend_df(pd.DataFrame(load(f)))
with open(f"{ARTIFACTS_PATH}/SQ_slow_reports.json", "r") as f:
    sq_slow_df = extend_df(pd.DataFrame(load(f)))
with open(f"{ARTIFACTS_PATH}/SQ_fast_reports.json", "r") as f:
    sq_fast_df = extend_df(pd.DataFrame(load(f)))

### slot $\rightarrow$ fast

In [None]:
job_slow_df[(job_slow_df["workload"] == "JOB[train]")].sort_values(by="e2e boost (%)", ascending=False)

In [None]:
job_slow_df[(job_slow_df["workload"] == "JOB[test]")].sort_values(by="e2e boost (%)", ascending=False)

In [None]:
sq_slow_df[(sq_slow_df["workload"] == "SQ[train]")].sort_values(by="e2e boost (%)", ascending=False)

In [None]:
sq_slow_df[(sq_slow_df["workload"] == "SQ[test]")].sort_values(by="e2e boost (%)", ascending=False)

We see, that `Hero` is always better on `train`, is safer on `test` (`SQ`), but sometimes it misses possible boost (`JOB`)

### fast $\rightarrow$ slow

In [None]:
job_fast_df[(job_fast_df["workload"] == "JOB[train]")].sort_values(by="e2e boost (%)", ascending=False)

In [None]:
job_fast_df[(job_fast_df["workload"] == "JOB[test]")].sort_values(by="e2e boost (%)", ascending=False)

In [None]:
sq_fast_df[(sq_fast_df["workload"] == "SQ[train]")].sort_values(by="e2e boost (%)", ascending=False)

In [None]:
sq_fast_df[(sq_fast_df["workload"] == "SQ[test]")].sort_values(by="e2e boost (%)", ascending=False)

**Conclusions.**\
We can perfectly see the signs of **overfitting** in `Hero` (which is what we wanted) - almost perfect performance on training and safe, rare predictions on the test. On new data, we stop predicting, so we don't get degradation.

The NN approach, on the other hand, has the advantage of **being able to generalise knowledge** to new queries. Thus, we can see that generalisation from fast queries on `SQ` is quite effective - we can speed up their execution by 2 times (even taking into account that we get degradations in 17% of cases). However, prediction on new queries can also bring regression, which is observed from generalising over slow queries.

P.S. It is probably easier to generalise knowledge from short queries to long queries because parts of their efficient long query plans are quite fast to execute and must have already been encountered in fast query plans.

## Split by structure

In [None]:
def get_traintest_split(groups, ratio, seed=42, debug=False):
    train, test = [], []
    for group in groups:
        random.seed(seed)
        random.shuffle(group)
        pivot = int(len(group) * ratio)
        train += group[:pivot]
        test += group[pivot:]
        if debug: 
            print(f"{group} -> {group[:pivot]}, {group[pivot:]}")
    return train, test

In [None]:
epochs = 300
for workload, workload_name in [(job_workload, "JOB")]:
    
    logical_trees_to_queries = defaultdict(list)
    for q_n in workload:
        logical_trees_to_queries[_get_logical_tree(q_n, DEFAULT_HINTSET, DEFAULT_DOP)].append(q_n)

    structure_reports = []
    for seed in range(10):
        train, test = get_traintest_split([v for v in logical_trees_to_queries.values() if len(v) > 1], ratio=0.5, seed=seed)
        nnmodel = NN(fit_settings=ALL_SS, inference_settings=ss, model=get_bt_regressor("dummy", DEVICE))
        nnmodel.fit(train, epochs)

        for ss, ss_descr in [
            (GREEDY_SS, "GREEDY"),
            (PRUNED_GREEDY_SS, "PRUNED GREEDY"),
            (LOCAL_SS, "LOCAL"),
            (PRUNED_LOCAL_SS, "PRUNED LOCAL"),
            (ALL_SS, "EXHAUSTIVE"),
        ]:
            structure_reports.append(get_report(nnmodel, "NN", train, f"{workload_name}[train]", ss, ss_descr, only_def_dop=False))
            structure_reports.append(get_report(nnmodel, "NN", test, f"{workload_name}[test]", ss, ss_descr, only_def_dop=False))

            heromodel = Hero(ss)
            heromodel.fit(train)
            structure_reports.append(get_report(heromodel, "Hero", train, f"{workload_name}[train]", ss, ss_descr, only_def_dop=False))
            structure_reports.append(get_report(heromodel, "Hero", test, f"{workload_name}[test]", ss, ss_descr, only_def_dop=False))
        
    with open(f"{ARTIFACTS_PATH}/{workload_name}_structure_reports.json", "w") as f:
        dump(structure_reports, f)

In [None]:
def aggregate_results(reports):
    df = pd.DataFrame(reports)
    df["ex boost (% of opt)"] = 100 * (df["def_ex"] - df["custom_ex"]) / (df["def_ex"] - df["opt_ex"])
    df["e2e boost (% of opt)"] = 100 * (df["def_e2e"] - df["custom_e2e"]) / (df["def_e2e"] - df["opt_e2e"])
    df["e2e boost (%)"] = 100 * (df["def_e2e"] - df["custom_e2e"]) / df["def_e2e"]

    def count_real_degradations(predictions):
        return sum(
            _get_e2e_time(q_n, hs, dop) > 1.1 * _get_e2e_time(q_n, DEFAULT_HINTSET, DEFAULT_DOP)
            for q_n, hs, dop in predictions
        )
    
    sizes = df["predictions"].apply(lambda el: len(el))
    df["n_timeouts (%)"] = 100 * df["n_timeouts"].apply(lambda el: int(el)) / sizes
    df["n_real_degradations (%)"] = 100 * df["predictions"].apply(count_real_degradations) / sizes

    experiment_cols = ["model", "workload", "searching_settings"]
    value_cols = [
        "e2e boost (%)",
        "e2e boost (% of opt)", 
        "ex boost (% of opt)", 
        "n_timeouts (%)", 
        "n_real_degradations (%)",
        "custom_e2e", 
        "custom_ex", 
        "custom_inference",    
    ]

    df = df[experiment_cols + value_cols]

    grouped_df = df.groupby(experiment_cols).agg(
        {
            col: ["mean", "std"]
            for col in value_cols
        }
    ).reset_index()

    def combine_mean_std(row):
        return f"{row['mean']:.1f} ± {row['std']:.1f}"

    for col in value_cols:
        grouped_df[(col, 'mean ± std')] = grouped_df[col].apply(combine_mean_std, axis=1)

    grouped_df.columns = [' '.join(col).strip() for col in grouped_df.columns.values]
    columns_to_keep = experiment_cols + [f"{col} mean" for col in value_cols] + [f"{col} std" for col in value_cols]
    grouped_df = grouped_df[columns_to_keep]
    grouped_df.columns = experiment_cols + [f"mean {col}" for col in value_cols] + [f"std {col}" for col in value_cols]
    return grouped_df.round(1)

In [None]:
with open(f"{ARTIFACTS_PATH}/JOB_structure_reports.json", "r") as f:
    job_structure_df = aggregate_results(load(f))

In [None]:
interesting_cols = [
    "model", 
    "searching_settings",
    "mean e2e boost (%)",
    "mean e2e boost (% of opt)",
    "mean ex boost (% of opt)",
    "mean n_timeouts (%)",
    "mean n_real_degradations (%)",    
    "std e2e boost (%)",
    "std e2e boost (% of opt)",
    ]

In [None]:
job_structure_df[(job_structure_df["workload"] == "JOB[train]")].sort_values(by="mean e2e boost (% of opt)", ascending=False)[interesting_cols]

In [None]:
job_structure_df[(job_structure_df["workload"] == "JOB[test]")].sort_values(by="mean e2e boost (% of opt)", ascending=False)[interesting_cols]

We see, that generalisation in the presence of structure is efficient (boost up to 36% on JOB and 70% on `SQ`), but even though the structure of the logical plans was repeated, about 20% of the predictions either slowed down the query or led them to `T/O`.

## Random Split

In [None]:
epochs = 300
for workload, workload_name in [(job_workload, "JOB"), (sq_workload, "SQ")]:
    random_split_reports = []
    for seed in range(10):
        train, test = get_traintest_split([workload], ratio=0.5, seed=seed)
        nnmodel = NN(fit_settings=ALL_SS, inference_settings=ss, model=get_bt_regressor("dummy", DEVICE))
        nnmodel.fit(train, epochs)

        for ss, ss_descr in [
            (GREEDY_SS, "GREEDY"),
            (PRUNED_GREEDY_SS, "PRUNED GREEDY"),
            (LOCAL_SS, "LOCAL"),
            (PRUNED_LOCAL_SS, "PRUNED LOCAL"),
            (ALL_SS, "EXHAUSTIVE"),
        ]:
            random_split_reports.append(get_report(nnmodel, "NN", train, f"{workload_name}[train]", ss, ss_descr, only_def_dop=False))
            random_split_reports.append(get_report(nnmodel, "NN", test, f"{workload_name}[test]", ss, ss_descr, only_def_dop=False))

            heromodel = Hero(ss)
            heromodel.fit(train)
            random_split_reports.append(get_report(heromodel, "Hero", train, f"{workload_name}[train]", ss, ss_descr, only_def_dop=False))
            random_split_reports.append(get_report(heromodel, "Hero", test, f"{workload_name}[test]", ss, ss_descr, only_def_dop=False))
        
    with open(f"{ARTIFACTS_PATH}/{workload_name}_random_split_reports.json", "w") as f:
        dump(random_split_reports, f)

In [None]:
with open(f"{ARTIFACTS_PATH}/JOB_random_split_reports.json", "r") as f:
    job_random_split_df = aggregate_results(load(f))
with open(f"{ARTIFACTS_PATH}/SQ_random_split_reports.json", "r") as f:
    sq_random_split_df = aggregate_results(load(f))

In [None]:
sq_random_split_df[(sq_random_split_df["workload"] == "SQ[test]")].sort_values(by="mean e2e boost (%)", ascending=False)[interesting_cols]

In [None]:
job_random_split_df[(job_random_split_df["workload"] == "JOB[test]")].sort_values(by="mean e2e boost (%)", ascending=False)[interesting_cols]

**Conclusions.**\
We can clearly see that with random partitioning the "power" of generalisation drops significantly. We also can see, that the more plans are evaluated by the NN, the greater the probability of observing degradations and regressions (up to 30% `T/O`). But even versions with pruned search slow down queries in about 20% of cases.