In [1]:
import os
import sys

ROOT_PATH = os.path.dirname(os.getcwd())
sys.path.insert(0, ROOT_PATH)

In [2]:
from json import load, dumps, dump
from src.datasets.oracle import Oracle, OracleRequest
from src.datasets.data_config import HINTSETS, DEFAULT_HINTSET

In [3]:
cached_oracles = {
    "JOB": Oracle(f"{ROOT_PATH}/data/processed/JOB"),
    "tpch_10gb": Oracle(f"{ROOT_PATH}/data/processed/tpch_10gb"),
    "sample_queries": Oracle(f"{ROOT_PATH}/data/processed/sample_queries"),
}

In [27]:
def _extract_query_number(query_name):
    import re

    return int(re.search(r"-?\d+\.?\d*", query_name).group())


assert _extract_query_number("42abc666") == 42
assert _extract_query_number("q42abc666") == 42
assert _extract_query_number("q042abc666") == 42

In [20]:
def get_worst_hintset(oracle, query_name, possible_hintsets, dop):
    assert isinstance(possible_hintsets, list) and len(possible_hintsets)
    worst_time, worst_hs = float("-inf"), possible_hintsets[0]

    for hs in possible_hintsets:
        request = OracleRequest(query_name=query_name, hintset=hs, dop=dop)
        time = oracle.get_execution_time(request=request)
        if worst_time < time:
            worst_time, worst_hs = time, hs

    return worst_hs


In [None]:
def get_best_hintset(oracle, query_name, possible_hintsets, dop):
    assert isinstance(possible_hintsets, list) and len(possible_hintsets)
    best_time, best_hs = float("inf"), possible_hintsets[0]
    
    for hs in possible_hintsets:
        request = OracleRequest(query_name=query_name, hintset=hs, dop=dop)
        time = oracle.get_execution_time(request=request)
        if time < best_time:
            best_time, best_hs = time, hs

    return best_hs

In [21]:
dop = 64
query_name = "6d"
best_hintset = get_best_hintset(cached_oracles["JOB"], query_name, HINTSETS, dop)
worst_hintset = get_worst_hintset(cached_oracles["JOB"], query_name, HINTSETS, dop)

In [24]:
best_request = OracleRequest(query_name=query_name, hintset=best_hintset, dop=dop)
worst_request = OracleRequest(query_name=query_name, hintset=worst_hintset, dop=dop)
default_request = OracleRequest(query_name=query_name, hintset=DEFAULT_HINTSET, dop=dop)

(
    cached_oracles["JOB"].get_execution_time(best_request),
    cached_oracles["JOB"].get_execution_time(worst_request),
    cached_oracles["JOB"].get_execution_time(default_request),
)

(5018.736, 4398046511104.0, 14361.208)