In [34]:
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.append("../")
from utils.load_brad_trace import load_trace, create_concurrency_dataset, load_trace_all_version
from models.concurrency.analytical_models import SimpleFitCurve, ComplexFitCurve
from models.concurrency.xgboost import XGBoostPredictor
from models.concurrency.linear_regression import SimpleLinearReg
np.set_printoptions(suppress=True)

In [40]:
folder_name = "mixed_aurora"
directory = f"/Users/ziniuw/Desktop/research/Data/AWS_trace/{folder_name}/"
all_raw_trace, all_trace = load_trace_all_version(directory, 8, concat=True)
all_concurrency_df = []
for trace in all_trace:
    concurrency_df = create_concurrency_dataset(trace, engine=None, pre_exec_interval=200)
    all_concurrency_df.append(concurrency_df)
concurrency_df = pd.concat(all_concurrency_df, ignore_index=True)
isolated_trace_df = pd.read_csv(f"/Users/ziniuw/Desktop/research/Data/AWS_trace/{folder_name}/repeating_olap_batch_warmup.csv")
#isolated_trace_df = pd.read_csv(f"/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_redshift/repeating_olap_batch_warmup.csv")
isolated_trace_df["runtime"] = isolated_trace_df["run_time_s"]
isolated_rt_cache = dict()
for i, rows in isolated_trace_df.groupby("query_idx"):
    isolated_rt_cache[i] = np.median(rows["runtime"])

In [36]:
for i, rows in concurrency_df.groupby("query_idx"):
    runtime = rows["runtime"].values
    print(i, len(rows), isolated_rt_cache[i], np.mean(runtime), np.min(runtime), np.max(runtime), np.std(runtime))

0 23 2.397554874420166 6.9897928134254785 1.7361910343170166 46.89248514175415 9.473017881796181
1 21 0.0336682796478271 0.042800471896216954 0.0306851863861084 0.064239501953125 0.008890692266292256
2 25 20.54295063018799 77.7485980606079 36.011961460113525 176.65631198883057 32.84361507978263
3 21 235.5201551914215 723.5043605055128 514.6219673156738 996.0332036018372 99.72439938805589
4 23 75.73165822029114 183.26921186239824 95.24777936935423 322.1603753566742 57.0662497887613
5 21 15.107906103134155 72.08878095944722 24.272745609283447 158.4005913734436 29.339563577499423
6 22 37.87974190711975 103.16899719021536 31.87341856956482 219.7085883617401 39.49215570636949
7 22 0.5295925140380859 0.9977116801521995 0.5253846645355225 3.6383426189422607 0.6118403643045283
8 23 0.8444762229919434 5.961048416469408 0.2201185226440429 27.26981830596924 6.273220019590701
9 22 0.678797721862793 0.1255583979866721 0.0557055473327636 0.851060152053833 0.1615357087075341
10 21 0.0771567821502685 

In [41]:
np.random.seed(0)
train_idx = np.random.choice(len(concurrency_df), size=int(0.8 * len(concurrency_df)), replace=False)
test_idx = [i for i in range(len(concurrency_df)) if i not in train_idx]
eval_trace_df = concurrency_df.iloc[test_idx]
eval_trace_df = eval_trace_df[eval_trace_df['num_concurrent_queries'] > 0]

In [4]:
import numpy as np
from xgboost import XGBRegressor
import torch
import torch.optim as optim
from torch.nn.functional import l1_loss
import scipy.optimize as optimization
from torch.utils.data import DataLoader
from models.concurrency.base_model import ConcurPredictor
from models.concurrency.utils import QueryFeatureDataset, SimpleNet
from models.concurrency.analytical_functions import simple_queueing_func, interaction_func_torch, interaction_func_scipy
from parser.utils import load_json, dfs_cardinality, estimate_scan_in_mb


class SimpleFitCurve(ConcurPredictor):
    """
    Simple fit curve model for runtime prediction with concurrency
    runtime = queue_time(num_concurrency) + alpha(num_concurrency) * isolated_runtime
            = (a1 * max(num_concurrency-b1, 0)) + (1 + a2*min(num_concurrency, b1)) * isolated_runtime
    optimize a1, b1, b2
    """

    def __init__(self):
        super().__init__()
        self.isolated_rt_cache = dict()
        self.use_train = True
        self.a1_global = 0
        self.a1 = dict()
        self.b1_global = 0
        self.b1 = dict()
        self.a2_global = 0
        self.a2 = dict()

    def train(self, trace_df, use_train=True, isolated_trace_df=None):
        self.use_train = use_train
        self.get_isolated_runtime_cache(trace_df, isolated_trace_df)
        concurrent_df = trace_df[trace_df["num_concurrent_queries"] > 0]

        global_y = []
        global_x = []
        global_ir = []
        for i, rows in concurrent_df.groupby("query_idx"):
            if i not in self.isolated_rt_cache:
                continue
            isolated_rt = self.isolated_rt_cache[i]
            concurrent_rt = rows["runtime"].values
            if use_train:
                num_concurrency = rows["num_concurrent_queries_train"].values
            else:
                num_concurrency = rows["num_concurrent_queries"].values
            if len(num_concurrency) < 10:
                continue
            global_y.append(concurrent_rt)
            global_x.append(num_concurrency)
            global_ir.append(np.ones(len(num_concurrency)) * isolated_rt)
            fit, _ = optimization.curve_fit(
                simple_queueing_func,
                (num_concurrency, np.ones(len(num_concurrency)) * isolated_rt),
                concurrent_rt,
                np.array([5, 0.1, 20]),
            )
            self.a1[i] = fit[0]
            self.a2[i] = fit[1]
            self.b1[i] = fit[2]
        global_y = np.concatenate(global_y)
        global_x = np.concatenate(global_x)
        global_ir = np.concatenate(global_ir)
        fit, _ = optimization.curve_fit(
            simple_queueing_func,
            (global_x, global_ir),
            global_y,
            np.array([5, 0.1, 20]),
        )
        self.a1_global = fit[0]
        self.a2_global = fit[1]
        self.b1_global = fit[2]

    def predict(self, eval_trace_df, use_global=False):
        predictions = dict()
        labels = dict()
        for i, rows in eval_trace_df.groupby("query_idx"):
            if i not in self.isolated_rt_cache or i not in self.a1:
                continue
            isolated_rt = self.isolated_rt_cache[i]
            label = rows["runtime"].values
            labels[i] = label
            if self.use_train:
                num_concurrency = rows["num_concurrent_queries_train"].values
            else:
                num_concurrency = rows["num_concurrent_queries"].values
            x = (num_concurrency, np.ones(len(num_concurrency)) * isolated_rt)
            if use_global:
                pred = simple_queueing_func(
                    x, self.a1_global, self.a2_global, self.b1_global
                )
            else:
                pred = simple_queueing_func(x, self.a1[i], self.a2[i], self.b1[i])
            pred = np.maximum(pred, 0.001)
            predictions[i] = pred
        return predictions, labels


def fit_curve_loss_torch(x, y, params, constrain, loss_func="soft_l1", penalties=None):
    pred = interaction_func_torch(x, *params)
    lb = constrain.lb
    ub = constrain.ub
    if loss_func == "mae":
        loss = torch.abs(pred - y)
    elif loss_func == "mse":
        loss = (pred - y) ** 2
    elif loss_func == "soft_l1":
        loss = torch.sqrt(1 + (pred - y) ** 2) - 1
    else:
        assert False, f"loss func {loss_func} not implemented"
    loss = torch.mean(loss)
    for i, p in enumerate(params):
        if penalties is not None:
            penalty = penalties[i]
        else:
            penalty = 1
        pen = torch.exp(penalty * (p - ub[i])) + torch.exp(-1 * penalty * (p - lb[i]))
        loss += pen
    return loss


class ComplexFitCurve(ConcurPredictor):
    """
    Complex fit curve model for runtime prediction with concurrency
    See interaction_func_scipy for detailed analytical functions
    """

    def __init__(self, is_column_store=False, opt_method='scipy'):
        """

        :param is_column_store:
        :param opt_method:
        """
        # indicate whether the DBMS is a column_store
        #
        super().__init__()
        self.isolated_rt_cache = dict()
        self.average_rt_cache = dict()
        self.query_info = dict()
        self.db_stats = None
        self.table_sizes = dict()
        self.table_sizes_by_index = dict()
        self.table_nrows_by_index = dict()
        self.table_column_map = dict()
        self.use_pre_info = False
        self.use_post_info = False
        self.is_column_store = is_column_store
        self.opt_method = opt_method
        self.batch_size = 1024
        self.analytic_params = [0.5, 20, 2, 0.2, 0.5, 0.5, 0.1, 0.2, 0.8, 0.2, 10, 200, 16000]
        self.bound = optimization.Bounds(
            [0.1, 10, 0.01, 0.001, 0.001, 0.001, 0.001, 0.001, 0.5, 0.05, 2, 20, 10000],
            [1, 200, 2, 1, 0.9, 0.9, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.constrain = optimization.Bounds(
            [0.1, 10, 0.1, 0.01, 0.01, 0.01, 0.01, 0.1, 0.5, 0.05, 2, 20, 10000],
            [1, 200, 2, 1, 0.9, 0.9, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.penalty = [100, 0.1, 100, 100, 100, 100, 100, 100, 100, 100, 1, 0.1, 0.01]
        self.loss_func = "soft_l1"
        self.model = None
        self.analytic_func = None

    def _compute_table_size(self):
        for col in self.db_stats["column_stats"]:
            table = col["tablename"]
            if table not in self.table_column_map:
                self.table_sizes[table] = 0
                self.table_column_map[table] = []
            self.table_column_map[table].append(col["attname"])
            if col["avg_width"] is not None and col["avg_width"] > 0:
                self.table_sizes[table] += col["avg_width"]
        all_table_names = [t["relname"] for t in self.db_stats["table_stats"]]
        for table in self.table_sizes:
            if table in all_table_names:
                idx = all_table_names.index(table)
                num_tuples = self.db_stats["table_stats"][idx]["reltuples"]
                self.table_nrows_by_index[idx] = num_tuples
                size_in_mb = (num_tuples * self.table_sizes[table]) / (1024 * 1024)
                self.table_sizes[table] = size_in_mb
                self.table_sizes_by_index[idx] = size_in_mb

    def pre_process_queries(
        self, parsed_queries_path, with_width=True, use_true_card=False
    ):
        plans = load_json(parsed_queries_path, namespace=False)
        self.db_stats = plans["database_stats"]
        self._compute_table_size()
        self.query_info = dict()
        for i in range(len(plans["sql_queries"])):
            curr_query_info = dict()
            curr_query_info["sql"] = plans["sql_queries"][i]
            all_cardinality = []
            dfs_cardinality(
                plans["parsed_plans"][i], all_cardinality, with_width, use_true_card
            )
            curr_query_info["all_cardinality"] = all_cardinality
            est_scan, est_scan_per_table = estimate_scan_in_mb(
                self.db_stats,
                plans["parsed_queries"][i],
                use_true_card,
                self.is_column_store,
            )
            curr_query_info["est_scan"] = est_scan
            curr_query_info["est_scan_per_table"] = est_scan_per_table
            self.query_info[i] = curr_query_info

    def estimate_data_share_percentage(self, idx, concur_info, pre_exec_info=None):
        # TODO: make it smarter by considering buffer pool behavior
        curr_scan = self.query_info[idx]["est_scan_per_table"]
        curr_total_scan = self.query_info[idx]["est_scan"]
        if pre_exec_info is not None:
            concur_info = concur_info + pre_exec_info
        all_shared_scan = 0
        for table in curr_scan:
            table_size = self.table_sizes_by_index[table]
            table_shared_scan = 0
            for c in concur_info:
                concur_scan = self.query_info[c[0]]["est_scan_per_table"]
                if table in concur_scan:
                    concur_scan_perc = concur_scan[table] / table_size
                    overlap_scan = concur_scan_perc * curr_scan[table]
                    table_shared_scan += overlap_scan
            table_shared_scan = min(table_shared_scan, curr_scan[table])
            all_shared_scan += table_shared_scan
        return min(all_shared_scan / curr_total_scan, 1.0)

    def featurize_data(self, concurrent_df):
        global_y = []
        global_isolated_runtime = []
        global_avg_runtime = []
        global_num_concurrency = []
        global_sum_concurrent_runtime = []
        global_est_scan = []
        global_est_concurrent_scan = []
        global_scan_sharing_percentage = []
        global_max_est_card = []
        global_avg_est_card = []
        global_max_concurrent_card = []
        global_avg_concurrent_card = []
        global_query_idx = dict()
        start = 0
        for i, rows in concurrent_df.groupby("query_idx"):
            if (
                i not in self.isolated_rt_cache
                or i not in self.query_info
                or i not in self.average_rt_cache
            ):
                continue
            concurrent_rt = rows["runtime"].values
            query_info = self.query_info[i]
            n_rows = len(rows)
            if self.use_pre_info:
                num_concurrency = rows["num_concurrent_queries_train"].values
                concur_info = rows["concur_info_train"].values
            else:
                num_concurrency = rows["num_concurrent_queries"].values
                concur_info = rows["concur_info"].values
            pre_exec_info = rows["pre_exec_info"].values

            global_query_idx[i] = (start, start + n_rows)
            start += n_rows
            global_y.append(concurrent_rt)
            global_isolated_runtime.append(np.ones(n_rows) * self.isolated_rt_cache[i])
            global_avg_runtime.append(np.ones(n_rows) * self.average_rt_cache[i])
            global_num_concurrency.append(num_concurrency)
            global_est_scan.append(np.ones(n_rows) * query_info["est_scan"])
            global_max_est_card.append(
                np.ones(n_rows) * np.max(query_info["all_cardinality"]) / (1024 * 1024)
            )
            global_avg_est_card.append(
                np.ones(n_rows)
                * np.average(query_info["all_cardinality"])
                / (1024 * 1024)
            )
            for j in range(n_rows):
                sum_concurrent_runtime = 0
                sum_concurrent_scan = 0
                concurrent_card = []
                for c in concur_info[j]:
                    if c[0] in self.average_rt_cache:
                        sum_concurrent_runtime += self.average_rt_cache[c[0]]
                    else:
                        print(c[0])
                    if c[0] in self.query_info:
                        sum_concurrent_scan += self.query_info[c[0]]["est_scan"]
                        concurrent_card.extend(self.query_info[c[0]]["all_cardinality"])
                    else:
                        print(c[0])

                global_sum_concurrent_runtime.append(sum_concurrent_runtime)
                global_est_concurrent_scan.append(sum_concurrent_scan)
                if len(concurrent_card) == 0:
                    global_max_concurrent_card.append(0)
                    global_avg_concurrent_card.append(0)
                else:
                    global_max_concurrent_card.append(
                        np.max(concurrent_card) / (1024 * 1024)
                    )
                    global_avg_concurrent_card.append(
                        np.average(concurrent_card) / (1024 * 1024)
                    )
                global_scan_sharing_percentage.append(
                    self.estimate_data_share_percentage(
                        i, concur_info[j], pre_exec_info[j]
                    )
                )

        global_y = np.concatenate(global_y)
        global_isolated_runtime = np.concatenate(global_isolated_runtime)
        global_avg_runtime = np.concatenate(global_avg_runtime)
        global_num_concurrency = np.concatenate(global_num_concurrency)
        global_est_scan = np.concatenate(global_est_scan)
        global_max_est_card = np.concatenate(global_max_est_card)
        global_avg_est_card = np.concatenate(global_avg_est_card)
        global_sum_concurrent_runtime = np.asarray(global_sum_concurrent_runtime)
        global_est_concurrent_scan = np.asarray(global_est_concurrent_scan)
        global_max_concurrent_card = np.asarray(global_max_concurrent_card)
        global_avg_concurrent_card = np.asarray(global_avg_concurrent_card)
        global_scan_sharing_percentage = np.asarray(global_scan_sharing_percentage)
        feature = (
            global_isolated_runtime,
            global_avg_runtime,
            global_num_concurrency,
            global_sum_concurrent_runtime,
            global_est_scan,
            global_est_concurrent_scan,
            global_scan_sharing_percentage,
            global_max_est_card,
            global_avg_est_card,
            global_max_concurrent_card,
            global_avg_concurrent_card,
        )
        if self.opt_method == "torch" or self.opt_method == "nn":
            feature = list(feature)
            for i in range(len(feature)):
                feature[i] = torch.from_numpy(feature[i])
            feature = tuple(feature)
            global_y = torch.from_numpy(global_y)
        return feature, global_y, global_query_idx

    def train(self, trace_df, use_train=True, isolated_trace_df=None, analytic_func=None):
        if analytic_func is None:
            analytic_func = interaction_func_scipy
        self.analytic_func = analytic_func
        self.use_pre_info = use_train
        self.get_isolated_runtime_cache(
            trace_df, isolated_trace_df, get_avg_runtime=True
        )
        concurrent_df = trace_df[trace_df["num_concurrent_queries"] > 0]
        feature, label, _ = self.featurize_data(concurrent_df)

        initial_param_value = np.asarray(self.analytic_params)
        if self.opt_method == "scipy":
            fit, _ = optimization.curve_fit(
                self.analytic_func,
                feature,
                label,
                initial_param_value,
                bounds=self.bound,
                jac="3-point",
                method="trf",
                loss="soft_l1",
                verbose=1
            )
            self.analytic_params = list(fit)
        elif self.opt_method == "torch":
            torch_analytic_params = []
            torch_analytic_params_lr = []
            for p in self.analytic_params:
                if p == 10:
                    t_p = torch.tensor(float(p), requires_grad=False)
                else:
                    t_p = torch.tensor(float(p), requires_grad=True)
                torch_analytic_params.append(t_p)
                torch_analytic_params_lr.append({'params': t_p, 'lr': 0.01 * p ** 0.3})
            optimizer = optim.Adam(torch_analytic_params_lr, weight_decay=2e-5)
            dataset = QueryFeatureDataset(feature, label)
            train_dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
            for epoch in range(200):
                for X, y in train_dataloader:
                    optimizer.zero_grad()
                    loss = fit_curve_loss_torch(X, y, torch_analytic_params,
                                                self.constrain, loss_func=self.loss_func, penalties=self.penalty)
                    loss.backward()
                    optimizer.step()
                if epoch % 10 == 0:
                    print(epoch, loss.item())
                    print(torch_analytic_params)
            for i in range(len(self.analytic_params)):
                self.analytic_params[i] = torch_analytic_params[i].detach()
        elif self.opt_method == "nn":
            dataset = QueryFeatureDataset(feature, label)
            train_dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
            self.model = SimpleNet(len(feature))
            optimizer = optim.Adam(self.model.parameters(), lr=0.01, weight_decay=2e-5)
            for epoch in range(200):
                for X, y in train_dataloader:
                    X = torch.stack(X).float()
                    X = torch.transpose(X, 0, 1)
                    optimizer.zero_grad()
                    pred = self.model(X)
                    pred = pred.reshape(-1)
                    loss = l1_loss(pred, y)
                    loss.backward()
                    optimizer.step()
                if epoch % 10 == 0:
                    print(epoch, loss.item())
        elif self.opt_method == "xgboost":
            feature = np.stack(feature).T
            model = XGBRegressor(
                n_estimators=1000,
                max_depth=8,
                eta=0.2,
                subsample=1.0,
                eval_metric="mae",
                early_stopping_rounds=100,
            )
            train_idx = np.random.choice(
                len(feature), size=int(0.8 * len(feature)), replace=False
            )
            val_idx = [i for i in range(len(feature)) if i not in train_idx]
            model.fit(
                feature[train_idx],
                label[train_idx],
                eval_set=[(feature[val_idx], label[val_idx])],
                verbose=False,
            )
            self.model = model
        else:
            assert False, f"unrecognized optimization method {self.opt_method}"

    def predict(self, eval_trace_df, use_global=False, return_per_query=True):
        if self.analytic_func is None:
            self.analytic_func = interaction_func_scipy
        feature, labels, query_idx = self.featurize_data(eval_trace_df)
        if self.opt_method == "scipy":
            preds = self.analytic_func(
                feature,
                *self.analytic_params
            )
        elif self.opt_method == "torch":
            preds = interaction_func_torch(
                feature,
                *self.analytic_params
            )
            preds = preds.numpy()
            labels = labels.numpy()
        elif self.opt_method == "nn":
            feature = torch.stack(feature).float()
            feature = torch.transpose(feature, 0, 1)
            preds = self.model(feature)
            preds = preds.reshape(-1)
            preds = preds.detach().numpy()
            labels = labels.numpy()
        elif self.opt_method == "xgboost":
            feature = np.stack(feature).T
            preds = self.model.predict(feature)
            preds = np.maximum(preds, 0.001)
        else:
            assert False, f"unrecognized optimization method {self.opt_method}"
        if return_per_query:
            preds_per_query = dict()
            labels_per_query = dict()
            for i in query_idx:
                start, end = query_idx[i]
                preds_per_query[i] = preds[start:end]
                labels_per_query[i] = labels[start:end]
            return preds_per_query, labels_per_query
        else:
            return preds, labels


class ComplexFitCurveSeparation(ComplexFitCurve):
    """
    Complex fit curve model for runtime prediction with concurrency
    See interaction_func_scipy for detailed analytical functions
    """

    def __init__(self, is_column_store=False, opt_method='scipy'):
        super().__init__(is_column_store, opt_method)
        self.analytic_params = [0.3, 0.5, 20, 2, 0.2, 0.1, 0.3, 0.3, 0.3, 0.3, 0.1, 0.2, 0.8, 0.2, 10, 200, 16000]
        self.bound = optimization.Bounds(
            [0.1, 0.1, 10, 0.01, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.5, 0.05, 2, 20, 10000],
            [1, 1, 200, 2, 1, 0.9, 0.9, 0.9, 0.5, 0.5, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.constrain = optimization.Bounds(
            [0.1, 0.1, 10, 0.1, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.5, 0.05, 2, 20, 10000],
            [1, 1, 200, 2, 1, 0.9, 0.9, 0.9, 0.5, 0.5, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.penalty = [100, 100, 0.1, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 1, 0.1, 0.01]

    def featurize_data(self, concurrent_df):
        global_y = []
        global_isolated_runtime = []
        global_avg_runtime = []
        global_num_concurrency_pre = []
        global_num_concurrency_post = []
        global_sum_concurrent_runtime_pre = []
        global_sum_concurrent_runtime_post = []
        global_avg_time_elapsed_pre = []
        global_sum_time_overlap_post = []
        global_est_scan = []
        global_est_concurrent_scan_pre = []
        global_est_concurrent_scan_post = []
        global_scan_sharing_percentage = []
        global_max_est_card = []
        global_avg_est_card = []
        global_max_concurrent_card_pre = []
        global_max_concurrent_card_post = []
        global_avg_concurrent_card_pre = []
        global_avg_concurrent_card_post = []
        global_query_idx = dict()
        start = 0
        for i, rows in concurrent_df.groupby("query_idx"):
            if (
                    i not in self.isolated_rt_cache
                    or i not in self.query_info
                    or i not in self.average_rt_cache
            ):
                continue
            concurrent_rt = rows["runtime"].values
            start_time = rows["start_time"].values
            end_time = rows["end_time"].values
            query_info = self.query_info[i]
            n_rows = len(rows)
            num_concurrency_pre = rows["num_concurrent_queries_train"].values
            global_num_concurrency_pre.append(num_concurrency_pre)
            concur_info_prev = rows["concur_info_train"].values
            full_concur_info = rows["concur_info"].values
            concur_info_post = []
            for j in range(len(full_concur_info)):
                new_info = [c for c in full_concur_info[j] if c not in full_concur_info[j]]
                concur_info_post.append(new_info)
            pre_exec_info = rows["pre_exec_info"].values

            global_query_idx[i] = (start, start + n_rows)
            start += n_rows
            global_y.append(concurrent_rt)
            global_isolated_runtime.append(np.ones(n_rows) * self.isolated_rt_cache[i])
            global_avg_runtime.append(np.ones(n_rows) * self.average_rt_cache[i])
            global_est_scan.append(np.ones(n_rows) * query_info["est_scan"])
            global_max_est_card.append(
                np.ones(n_rows) * np.max(query_info["all_cardinality"]) / (1024 * 1024)
            )
            global_avg_est_card.append(
                np.ones(n_rows)
                * np.average(query_info["all_cardinality"])
                / (1024 * 1024)
            )
            for j in range(n_rows):
                sum_concurrent_runtime_pre = 0
                sum_concurrent_runtime_post = 0
                sum_concurrent_scan_pre = 0
                sum_concurrent_scan_post = 0
                avg_time_elapsed_pre = 0
                sum_time_overlap_post = 0
                concurrent_card_pre = []
                concurrent_card_post = []
                for c in full_concur_info[j]:
                    if c[0] in self.average_rt_cache:
                        if c in concur_info_prev[j]:
                            sum_concurrent_runtime_pre += self.average_rt_cache[c[0]]
                            avg_time_elapsed_pre += (start_time[j] - c[1])
                        else:
                            sum_concurrent_runtime_post += self.average_rt_cache[c[0]]
                            # TODO: this is not practical, make it an estimation
                            sum_time_overlap_post += (end_time[j] - c[1])
                    else:
                        print(c[0])
                    if c[0] in self.query_info:
                        if c in concur_info_prev[j]:
                            sum_concurrent_scan_pre += self.query_info[c[0]]["est_scan"]
                            concurrent_card_pre.extend(self.query_info[c[0]]["all_cardinality"])
                        else:
                            sum_concurrent_scan_post += self.query_info[c[0]]["est_scan"]
                            concurrent_card_post.extend(self.query_info[c[0]]["all_cardinality"])
                    else:
                        print(c[0])

                global_sum_concurrent_runtime_pre.append(sum_concurrent_runtime_pre)
                global_avg_time_elapsed_pre.append(avg_time_elapsed_pre / len(concur_info_prev[j]))
                global_est_concurrent_scan_pre.append(sum_concurrent_scan_pre)
                if len(concurrent_card_pre) == 0:
                    global_max_concurrent_card_pre.append(0)
                    global_avg_concurrent_card_pre.append(0)
                else:
                    global_max_concurrent_card_pre.append(
                        np.max(concurrent_card_pre) / (1024 * 1024)
                    )
                    global_avg_concurrent_card_pre.append(
                        np.average(concurrent_card_pre) / (1024 * 1024)
                    )
                # TODO: may be able to change concur_info_prev to full_concur_info?
                global_scan_sharing_percentage.append(
                    self.estimate_data_share_percentage(
                        i, concur_info_prev[j], pre_exec_info[j]
                    )
                )
                if self.use_pre_info:
                    global_sum_concurrent_runtime_post.append(0)
                    global_est_concurrent_scan_post.append(0)
                    global_sum_time_overlap_post.append(0)
                    global_max_concurrent_card_post.append(0)
                    global_avg_concurrent_card_post.append(0)
                else:
                    global_sum_concurrent_runtime_post.append(sum_concurrent_runtime_post)
                    global_est_concurrent_scan_post.append(sum_concurrent_scan_post)
                    global_sum_time_overlap_post.append(sum_time_overlap_post)
                    if len(concurrent_card_post) == 0:
                        global_max_concurrent_card_post.append(0)
                        global_avg_concurrent_card_post.append(0)
                    else:
                        global_max_concurrent_card_post.append(
                            np.max(concurrent_card_post) / (1024 * 1024)
                        )
                        global_avg_concurrent_card_post.append(
                            np.average(concurrent_card_post) / (1024 * 1024)
                        )

            if self.use_pre_info:
                num_concurrency_post = np.zeros(n_rows)
            else:
                num_concurrency_post = rows["num_concurrent_queries"].values - rows["num_concurrent_queries_train"].values
            global_num_concurrency_post.append(num_concurrency_post)

        global_y = np.concatenate(global_y)
        global_isolated_runtime = np.concatenate(global_isolated_runtime)
        global_avg_runtime = np.concatenate(global_avg_runtime)
        global_num_concurrency_pre = np.concatenate(global_num_concurrency_pre)
        global_num_concurrency_post = np.concatenate(global_num_concurrency_post)

        global_est_scan = np.concatenate(global_est_scan)
        global_max_est_card = np.concatenate(global_max_est_card)
        global_avg_est_card = np.concatenate(global_avg_est_card)
        global_avg_time_elapsed_pre = np.asarray(global_avg_time_elapsed_pre)
        global_sum_time_overlap_post = np.asarray(global_sum_time_overlap_post)
        global_sum_concurrent_runtime_pre = np.asarray(global_sum_concurrent_runtime_pre)
        global_sum_concurrent_runtime_post = np.asarray(global_sum_concurrent_runtime_post)
        global_est_concurrent_scan_pre = np.asarray(global_est_concurrent_scan_pre)
        global_est_concurrent_scan_post = np.asarray(global_est_concurrent_scan_post)
        global_max_concurrent_card_pre = np.asarray(global_max_concurrent_card_pre)
        global_max_concurrent_card_post = np.asarray(global_max_concurrent_card_post)
        global_avg_concurrent_card_pre = np.asarray(global_avg_concurrent_card_pre)
        global_avg_concurrent_card_post = np.asarray(global_avg_concurrent_card_post)
        global_scan_sharing_percentage = np.asarray(global_scan_sharing_percentage)
        feature = (
            global_isolated_runtime,
            global_avg_runtime,
            global_num_concurrency_pre,
            global_num_concurrency_post,
            global_sum_concurrent_runtime_pre,
            global_sum_concurrent_runtime_post,
            global_avg_time_elapsed_pre,
            global_sum_time_overlap_post,
            global_est_scan,
            global_est_concurrent_scan_pre,
            global_est_concurrent_scan_post,
            global_scan_sharing_percentage,
            global_max_est_card,
            global_avg_est_card,
            global_max_concurrent_card_pre,
            global_max_concurrent_card_post,
            global_avg_concurrent_card_pre,
            global_avg_concurrent_card_post
        )
        if self.opt_method == "torch" or self.opt_method == "nn":
            feature = list(feature)
            for i in range(len(feature)):
                feature[i] = torch.from_numpy(feature[i])
            feature = tuple(feature)
            global_y = torch.from_numpy(global_y)
        return feature, global_y, global_query_idx


In [27]:
def interaction_separation_func_scipy(
    x,
    n1,
    q1,
    i1,
    i2,
    c1,
    c2,
    m1,
    m2,
    m3,
    m4,
    m5,
    cm1,
    r1,
    r2,
    max_concurrency,
    avg_io_speed,
    memory_size,
    debug=False
):
    """
    An analytical function that can consider 3 types of resource sharing/contention: IO, memory, CPU
    x:: input tuple containing:
        isolated_runtime: the isolated runtime without concurrency of a query
        avg_runtime: average or median observed runtime of a query under any concurrency
        num_concurrency: number of concurrent queries running with this query
        sum_concurrent_runtime: sum of the estimated runtime of all queries concurrently running with this query (CPU)
        est_scan: estimated MB of data that this query will need to scan (IO)
        est_concurrent_scan: estimated MB of data that the concurrently running queries will need to scan (IO)
        scan_sharing_percentage: estimated percentage of data in cache (sharing) according to concurrent queries
        max_est_card: maximum estimated cardinality in the query plan of this query (reflect peak memory usage)
        avg_est_card: average estimated cardinality in the query plan of this query (reflect average memory usage)
        max_concurrent_card: maximum estimated cardinality for all concurrent queries
        avg_concurrent_card: average estimated cardinality for all concurrent queries
    TODO: adding memory, vCPU, and bandwidth information
    """
    (
        isolated_runtime,
        avg_runtime,
        num_concurrency_pre,
        num_concurrency_post,
        sum_concurrent_runtime_pre,
        sum_concurrent_runtime_post,
        avg_time_elapsed_pre,
        sum_time_overlap_post,
        est_scan,
        est_concurrent_scan_pre,
        est_concurrent_scan_post,
        scan_sharing_percentage,
        max_est_card,
        avg_est_card,
        max_concurrent_card_pre,
        max_concurrent_card_post,
        avg_concurrent_card_pre,
        avg_concurrent_card_post
    ) = x
    # fraction of running queries (as opposed to queueing queries)
    running_frac = np.minimum(num_concurrency_pre + num_concurrency_post, max_concurrency) / np.maximum(
        num_concurrency_pre + num_concurrency_post, 1
    )
    # estimate queueing time of a query based on the sum of concurrent queries' run time
    queueing_time = (
        q1
        * (
            np.maximum(num_concurrency_pre + n1 * num_concurrency_post - max_concurrency, 0)
            / np.maximum(num_concurrency_pre + n1 * num_concurrency_post, 1)
        )
        *
        (sum_concurrent_runtime_pre + n1 * sum_concurrent_runtime_post - avg_time_elapsed_pre * num_concurrency_pre)
    )
    queueing_time = np.maximum(queueing_time, 0)
    if debug:
        print("queueing_time", queueing_time)
    discount_pre = (sum_concurrent_runtime_pre - avg_time_elapsed_pre * num_concurrency_pre) * running_frac / np.maximum(sum_concurrent_runtime_pre, 0.1)
    discount_pre = np.maximum(discount_pre, 0.1)
    print("discount_pre", discount_pre)
    discount_post = sum_time_overlap_post / np.maximum(sum_concurrent_runtime_post, 0.1)
    # estimate io_speed of a query assuming each query has a base io_speed of i1 + the io speed due to contention
    io_speed = i1 + avg_io_speed / np.minimum(
        np.maximum(num_concurrency_pre * discount_pre + n1 * num_concurrency_post * discount_post, 1), max_concurrency
    )
    if debug:
        print("io_speed", io_speed)
    # estimate time speed on IO as the (estimated scan - data in cache) / estimated io_speed
    # use i2 to adjust the estimation error in est_scan and scan_sharing_percentage
    io_time = i2 * est_scan * np.maximum((1 - scan_sharing_percentage * running_frac), 0) / io_speed
    if debug:
        print("io_time", io_time)
    io_time_old = i2 * est_scan / io_speed
    if debug:
        print("io_time_old", io_time_old)
    # estimate the amount of CPU work/time as the weighted average of isolated_runtime and avg_runtime - io_time
    cpu_time_isolated = np.maximum((r1 * isolated_runtime + (1 - r1) * avg_runtime) - io_time_old, 0.1)
    if debug:
        print("cpu_time_isolated", cpu_time_isolated)
    # estimate the amount of CPU work imposed by the concurrent queries (approximated by their estimate runtime)
    cpu_concurrent_pre = (sum_concurrent_runtime_pre * discount_pre) / np.maximum(avg_runtime, 2)
    cpu_concurrent_post = sum_time_overlap_post / np.maximum(avg_runtime, 2)
    #cpu_concurrent_pre = np.sqrt((sum_concurrent_runtime_pre * discount_pre) / np.maximum(avg_runtime, 2))
    #cpu_concurrent_post = np.sqrt(sum_time_overlap_post / np.maximum(avg_runtime, 2))
    
    # estimate the amount of memory load imposed by the concurrent queries
    max_mem_usage_perc_pre = max_est_card / (max_concurrent_card_pre + max_est_card)
    avg_mem_usage_perc_pre = avg_est_card / (avg_concurrent_card_pre + avg_est_card)
    max_mem_usage_perc_post = max_est_card / (max_concurrent_card_post + max_est_card)
    avg_mem_usage_perc_post = avg_est_card / (avg_concurrent_card_post + avg_est_card)
    peak_mem_usage = (np.maximum(m1 * (max_concurrent_card_pre + max_est_card) - memory_size, 0) * max_mem_usage_perc_pre + 
        np.maximum(m1 * (max_concurrent_card_post + max_est_card) - memory_size, 0)
        * max_mem_usage_perc_post
        ) / memory_size
    avg_mem_usage = (np.maximum(m2 * (avg_concurrent_card_pre + avg_est_card) - memory_size, 0)
        * avg_mem_usage_perc_pre + np.maximum(m2 * (avg_concurrent_card_post + avg_est_card) - memory_size, 0)
        * avg_mem_usage_perc_post
        ) / memory_size
    mem_usage = m3 * np.sqrt(peak_mem_usage) + m4 * np.sqrt(avg_mem_usage)
    if debug:
        print("mem_usage", mem_usage)
    # estimate the CPU time of a query by considering the contention of CPU and memory of other queries

    cpu_time_scale_factor = (
        c1 * (cpu_concurrent_pre + c2 * cpu_concurrent_post)
    ) * (1 + mem_usage + cm1 * np.sqrt((cpu_concurrent_pre + cpu_concurrent_post) * mem_usage)) 
    cpu_time = (1 + cpu_time_scale_factor) * cpu_time_isolated
    if debug:
        print("cpu_scale", c1 * (cpu_concurrent_pre + c2 * cpu_concurrent_post))
        print("mem_scale", 1 + mem_usage + cm1 * np.sqrt((cpu_concurrent_pre + cpu_concurrent_post) * mem_usage))
    if debug:
        print("cpu_time_isolated", cpu_time_isolated)
    if debug:
        print("cpu_time", cpu_time)
    """
    print("==============================================================================")
    print("est_scan:", np.min(est_scan), np.mean(est_scan), np.max(est_scan))
    print("cpu_concurrent_pre:", np.min(cpu_concurrent_pre), np.mean(cpu_concurrent_pre), np.max(cpu_concurrent_pre))
    print("cpu_concurrent_post:", np.min(cpu_concurrent_post), np.mean(cpu_concurrent_post), np.max(cpu_concurrent_post))
    print("max_est_card:", np.min(max_est_card), np.mean(max_est_card), np.max(max_est_card))
    #print("max_concurrent_card_pre:", np.min(max_concurrent_card_pre), np.mean(max_concurrent_card_pre), np.max(max_concurrent_card_pre))
    #print("max_concurrent_card_post:", np.min(max_concurrent_card_post), np.mean(max_concurrent_card_post), np.max(max_concurrent_card_post))
    print("avg_est_card:", np.min(avg_est_card), np.mean(avg_est_card), np.max(avg_est_card))
    #print("avg_concurrent_card_pre:", np.min(avg_concurrent_card_pre), np.mean(avg_concurrent_card_pre), np.max(avg_concurrent_card_pre))
    #print("avg_concurrent_card_post:", np.min(avg_concurrent_card_post), np.mean(avg_concurrent_card_post), np.max(avg_concurrent_card_post))
    print("peak_mem_usage:", np.min(peak_mem_usage), np.mean(peak_mem_usage), np.max(peak_mem_usage))
    print("avg_mem_usage:", np.min(avg_mem_usage), np.mean(avg_mem_usage), np.max(avg_mem_usage))
    print("mem_usage:", np.min(mem_usage), np.mean(mem_usage), np.max(mem_usage))
    print("cpu_time_isolated", np.min(cpu_time_isolated), np.mean(cpu_time_isolated),  np.max(cpu_time_isolated))
    print("queueing time:", np.min(queueing_time), np.mean(queueing_time), np.max(queueing_time))
    print("io time:", np.min(io_time), np.mean(io_time), np.max(io_time))
    print("CPU time:", np.min(cpu_time), np.mean(cpu_time), np.max(cpu_time))
    # final runtime of a query is estimated to be the queueing time + io_time + cpu_time
    """
    return np.maximum(queueing_time + io_time + cpu_time, 0.01)


def interaction_separation_func_scipy_archive(
    x,
    n1,
    q1,
    i1,
    i2,
    c1,
    c2,
    m1,
    m2,
    m3,
    m4,
    m5,
    cm1,
    r1,
    r2,
    max_concurrency,
    avg_io_speed,
    memory_size,
):
    """
    An analytical function that can consider 3 types of resource sharing/contention: IO, memory, CPU
    x:: input tuple containing:
        isolated_runtime: the isolated runtime without concurrency of a query
        avg_runtime: average or median observed runtime of a query under any concurrency
        num_concurrency: number of concurrent queries running with this query
        sum_concurrent_runtime: sum of the estimated runtime of all queries concurrently running with this query (CPU)
        est_scan: estimated MB of data that this query will need to scan (IO)
        est_concurrent_scan: estimated MB of data that the concurrently running queries will need to scan (IO)
        scan_sharing_percentage: estimated percentage of data in cache (sharing) according to concurrent queries
        max_est_card: maximum estimated cardinality in the query plan of this query (reflect peak memory usage)
        avg_est_card: average estimated cardinality in the query plan of this query (reflect average memory usage)
        max_concurrent_card: maximum estimated cardinality for all concurrent queries
        avg_concurrent_card: average estimated cardinality for all concurrent queries
    TODO: adding memory and CPU information
    """
    (
        isolated_runtime,
        avg_runtime,
        num_concurrency_pre,
        num_concurrency_post,
        sum_concurrent_runtime_pre,
        sum_concurrent_runtime_post,
        avg_time_elapsed_pre,
        sum_time_overlap_post,
        est_scan,
        est_concurrent_scan_pre,
        est_concurrent_scan_post,
        scan_sharing_percentage,
        max_est_card,
        avg_est_card,
        max_concurrent_card_pre,
        max_concurrent_card_post,
        avg_concurrent_card_pre,
        avg_concurrent_card_post
    ) = x
    # fraction of running queries (as opposed to queueing queries)
    running_frac = np.minimum(num_concurrency_pre + num_concurrency_post, max_concurrency) / np.maximum(
        num_concurrency_pre + num_concurrency_post, 1
    )
    # estimate queueing time of a query based on the sum of concurrent queries' run time
    queueing_time = (
        q1
        * (
            np.maximum(num_concurrency_pre + n1 * num_concurrency_post - max_concurrency, 0)
            / np.maximum(num_concurrency_pre + n1 * num_concurrency_post, 1)
        )
        *
        (sum_concurrent_runtime_pre + n1 * sum_concurrent_runtime_post - avg_time_elapsed_pre * num_concurrency_pre)
    )
    queueing_time = np.maximum(queueing_time, 0)
    discount_pre = (sum_concurrent_runtime_pre - avg_time_elapsed_pre * num_concurrency_pre) * running_frac / np.maximum(sum_concurrent_runtime_pre, 0.1)
    discount_pre = np.maximum(discount_pre, 0)
    discount_post = sum_time_overlap_post / np.maximum(sum_concurrent_runtime_post, 0.1)
    # estimate io_speed of a query assuming each query has a base io_speed of i1 + the io speed due to contention
    io_speed = i1 + avg_io_speed / np.minimum(
        np.maximum(num_concurrency_pre * discount_pre + n1 * num_concurrency_post * discount_post, 1), max_concurrency
    )
    # estimate time speed on IO as the (estimated scan - data in cache) / estimated io_speed
    # use i2 to adjust the estimation error in est_scan and scan_sharing_percentage
    print("est_scan:", np.min(est_scan), np.mean(est_scan), np.max(est_scan))
    io_time = i2 * est_scan * (1 - scan_sharing_percentage * running_frac) / io_speed
    io_time_old = i2 * est_scan / io_speed
    # estimate the amount of CPU work/time as the weighted average of isolated_runtime and avg_runtime - io_time
    cpu_time_isolated = np.maximum((r1 * isolated_runtime + r2 * avg_runtime) - io_time_old, 0.1)
    # estimate the amount of CPU work imposed by the concurrent queries (approximated by their estimate runtime)
    cpu_concurrent_pre = (sum_concurrent_runtime_pre * discount_pre) / np.maximum(avg_runtime, 2)
    cpu_concurrent_post = sum_time_overlap_post / np.maximum(avg_runtime, 2)
    print("cpu_concurrent_pre:", np.min(cpu_concurrent_pre), np.mean(cpu_concurrent_pre), np.max(cpu_concurrent_pre))
    print("cpu_concurrent_post:", np.min(cpu_concurrent_post), np.mean(cpu_concurrent_post), np.max(cpu_concurrent_post))
    # estimate the amount of memory load imposed by the concurrent queries
    max_mem_usage_perc_pre = max_concurrent_card_pre / (max_concurrent_card_pre + max_est_card)
    avg_mem_usage_perc_pre = avg_concurrent_card_pre / (avg_concurrent_card_pre + avg_est_card)
    max_mem_usage_perc_post = max_concurrent_card_post / (max_concurrent_card_post + max_est_card)
    avg_mem_usage_perc_post = avg_concurrent_card_post / (avg_concurrent_card_post + avg_est_card)
    print("max_est_card:", np.min(max_est_card), np.mean(max_est_card), np.max(max_est_card))
    print("max_concurrent_card_pre:", np.min(max_concurrent_card_pre), np.mean(max_concurrent_card_pre), np.max(max_concurrent_card_pre))
    print("max_concurrent_card_post:", np.min(max_concurrent_card_post), np.mean(max_concurrent_card_post), np.max(max_concurrent_card_post))
    print("avg_est_card:", np.min(avg_est_card), np.mean(avg_est_card), np.max(avg_est_card))
    print("avg_concurrent_card_pre:", np.min(avg_concurrent_card_pre), np.mean(avg_concurrent_card_pre), np.max(avg_concurrent_card_pre))
    print("avg_concurrent_card_post:", np.min(avg_concurrent_card_post), np.mean(avg_concurrent_card_post), np.max(avg_concurrent_card_post))
    memory_concurrent = np.log(
        m1
        * np.maximum(max_concurrent_card_pre + max_est_card - memory_size, 0.01)
        * max_mem_usage_perc_pre
        + m2
        * np.maximum(avg_concurrent_card_pre + avg_est_card - memory_size, 0.01)
        * avg_mem_usage_perc_pre
        + m3
        * np.maximum(max_concurrent_card_post + max_est_card - memory_size, 0.01)
        * max_mem_usage_perc_post
        + m4
        * np.maximum(avg_concurrent_card_post + avg_est_card - memory_size, 0.01)
        * avg_mem_usage_perc_post
        + 0.0001
    ) * np.log((m1 + m3) * max_est_card + (m2 + m4) * avg_est_card + 0.0001)
    memory_concurrent = np.maximum(memory_concurrent, 0)
    print("memory_concurrent:", np.min(memory_concurrent), np.mean(memory_concurrent), np.max(memory_concurrent))
    print("cpu_time_isolated", np.min(cpu_time_isolated), np.mean(cpu_time_isolated),  np.max(cpu_time_isolated))
    memory_concurrent = np.maximum(memory_concurrent, 0)
    # estimate the CPU time of a query by considering the contention of CPU and memory of other queries
    cpu_time = (
        1
        + c1 * cpu_concurrent_pre
        + c1 * cpu_concurrent_post
        + m5 * memory_concurrent
        + cm1 * np.sqrt((cpu_concurrent_pre + cpu_concurrent_post) * memory_concurrent)
    ) * cpu_time_isolated
    # final runtime of a query is estimated to be the queueing time + io_time + cpu_time
    print("queueing time:", np.min(queueing_time), np.mean(queueing_time), np.max(queueing_time))
    print("io time:", np.min(io_time), np.mean(io_time), np.max(io_time))
    print("CPU time:", np.min(cpu_time), np.mean(cpu_time), np.max(cpu_time))
    return np.maximum(queueing_time + io_time + cpu_time, 0.01)
    

class ComplexFitCurveSeparation(ComplexFitCurve):
    """
    Complex fit curve model for runtime prediction with concurrency
    See interaction_func_scipy for detailed analytical functions
    """

    def __init__(self, is_column_store=False, opt_method='scipy'):
        super().__init__(is_column_store, opt_method)
        self.analytic_params = [0.3, 0.5, 20, 2, 0.2, 0.9, 0.3, 0.3, 0.3, 0.3, 0.1, 0.2, 0.5, 0.5, 10, 200, 16000]
        self.param_names = ['n1', 'q1', 'i1', 'i2', 'c1', 'c2', 'm1', 'm2', 'm3', 'm4', 'm5', 'cm1', 
                            'r1', 'r2', 'max_concurrency', 'avg_io_speed', 'memory_size',]
        self.bound = optimization.Bounds(
            [0.01, 0.1, 10, 0.1, 0.0001, 0.0001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.4, 0.4, 10, 20, 10000],
            [1, 1, 200, 2, 1, 1, 1, 0.9, 0.5, 0.5, 0.5, 0.5, 0.8, 0.8, 20, 2000, 25000],
        )
        self.constrain = optimization.Bounds(
            [0.1, 0.1, 10, 0.1, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.5, 0.05, 2, 20, 10000],
            [1, 1, 200, 2, 1, 0.9, 0.9, 0.9, 0.5, 0.5, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.penalty = [100, 100, 0.1, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 1, 0.1, 0.01]

    def featurize_data(self, concurrent_df):
        global_y = []
        global_isolated_runtime = []
        global_avg_runtime = []
        global_num_concurrency_pre = []
        global_num_concurrency_post = []
        global_sum_concurrent_runtime_pre = []
        global_sum_concurrent_runtime_post = []
        global_avg_time_elapsed_pre = []
        global_sum_time_overlap_post = []
        global_est_scan = []
        global_est_concurrent_scan_pre = []
        global_est_concurrent_scan_post = []
        global_scan_sharing_percentage = []
        global_max_est_card = []
        global_avg_est_card = []
        global_max_concurrent_card_pre = []
        global_max_concurrent_card_post = []
        global_avg_concurrent_card_pre = []
        global_avg_concurrent_card_post = []
        global_query_idx = dict()
        start = 0
        for i, rows in concurrent_df.groupby("query_idx"):
            if (
                    i not in self.isolated_rt_cache
                    or i not in self.query_info
                    or i not in self.average_rt_cache
            ):
                continue
            concurrent_rt = rows["runtime"].values
            start_time = rows["start_time"].values
            end_time = rows["end_time"].values
            query_info = self.query_info[i]
            n_rows = len(rows)
            num_concurrency_pre = rows["num_concurrent_queries_train"].values
            global_num_concurrency_pre.append(num_concurrency_pre)
            concur_info_prev = rows["concur_info_train"].values
            full_concur_info = rows["concur_info"].values
            concur_info_post = []
            for j in range(len(full_concur_info)):
                new_info = [c for c in full_concur_info[j] if c not in full_concur_info[j]]
                concur_info_post.append(new_info)
            pre_exec_info = rows["pre_exec_info"].values

            global_query_idx[i] = (start, start + n_rows)
            start += n_rows
            global_y.append(concurrent_rt)
            global_isolated_runtime.append(np.ones(n_rows) * self.isolated_rt_cache[i])
            global_avg_runtime.append(np.ones(n_rows) * self.average_rt_cache[i])
            global_est_scan.append(np.ones(n_rows) * query_info["est_scan"])
            global_max_est_card.append(
                np.ones(n_rows) * np.max(query_info["all_cardinality"]) / (1024 * 1024)
            )
            global_avg_est_card.append(
                np.ones(n_rows)
                * np.average(query_info["all_cardinality"])
                / (1024 * 1024)
            )
            for j in range(n_rows):
                sum_concurrent_runtime_pre = 0
                sum_concurrent_runtime_post = 0
                sum_concurrent_scan_pre = 0
                sum_concurrent_scan_post = 0
                avg_time_elapsed_pre = 0
                sum_time_overlap_post = 0
                concurrent_card_pre = []
                concurrent_card_post = []
                for c in full_concur_info[j]:
                    if c[0] in self.average_rt_cache:
                        if c in concur_info_prev[j]:
                            sum_concurrent_runtime_pre += self.average_rt_cache[c[0]]
                            avg_time_elapsed_pre += (start_time[j] - c[1])
                        else:
                            sum_concurrent_runtime_post += self.average_rt_cache[c[0]]
                            # TODO: this is not practical, make it an estimation
                            sum_time_overlap_post += (end_time[j] - c[1])
                    else:
                        print(c[0])
                    if c[0] in self.query_info:
                        if c in concur_info_prev[j]:
                            sum_concurrent_scan_pre += self.query_info[c[0]]["est_scan"]
                            concurrent_card_pre.extend(self.query_info[c[0]]["all_cardinality"])
                        else:
                            sum_concurrent_scan_post += self.query_info[c[0]]["est_scan"]
                            concurrent_card_post.extend(self.query_info[c[0]]["all_cardinality"])
                    else:
                        print(c[0])

                global_sum_concurrent_runtime_pre.append(sum_concurrent_runtime_pre)
                global_avg_time_elapsed_pre.append(avg_time_elapsed_pre / (len(concur_info_prev[j]) + 0.001))
                global_est_concurrent_scan_pre.append(sum_concurrent_scan_pre)
                if len(concurrent_card_pre) == 0:
                    global_max_concurrent_card_pre.append(0)
                    global_avg_concurrent_card_pre.append(0)
                else:
                    global_max_concurrent_card_pre.append(
                        np.max(concurrent_card_pre) / (1024 * 1024)
                    )
                    global_avg_concurrent_card_pre.append(
                        np.average(concurrent_card_pre) / (1024 * 1024)
                    )
                # TODO: may be able to change concur_info_prev to full_concur_info?
                global_scan_sharing_percentage.append(
                    self.estimate_data_share_percentage(
                        i, concur_info_prev[j], pre_exec_info[j]
                    )
                )
                if self.use_pre_info:
                    global_sum_concurrent_runtime_post.append(0)
                    global_est_concurrent_scan_post.append(0)
                    global_sum_time_overlap_post.append(0)
                    global_max_concurrent_card_post.append(0)
                    global_avg_concurrent_card_post.append(0)
                else:
                    global_sum_concurrent_runtime_post.append(sum_concurrent_runtime_post)
                    global_est_concurrent_scan_post.append(sum_concurrent_scan_post)
                    global_sum_time_overlap_post.append(sum_time_overlap_post)
                    if len(concurrent_card_post) == 0:
                        global_max_concurrent_card_post.append(0)
                        global_avg_concurrent_card_post.append(0)
                    else:
                        global_max_concurrent_card_post.append(
                            np.max(concurrent_card_post) / (1024 * 1024)
                        )
                        global_avg_concurrent_card_post.append(
                            np.average(concurrent_card_post) / (1024 * 1024)
                        )

            if self.use_pre_info:
                num_concurrency_post = np.zeros(n_rows)
            else:
                num_concurrency_post = rows["num_concurrent_queries"].values - rows["num_concurrent_queries_train"].values
            global_num_concurrency_post.append(num_concurrency_post)

        global_y = np.concatenate(global_y)
        global_isolated_runtime = np.concatenate(global_isolated_runtime)
        global_avg_runtime = np.concatenate(global_avg_runtime)
        global_num_concurrency_pre = np.concatenate(global_num_concurrency_pre)
        global_num_concurrency_post = np.concatenate(global_num_concurrency_post)

        global_est_scan = np.concatenate(global_est_scan)
        global_max_est_card = np.concatenate(global_max_est_card)
        global_avg_est_card = np.concatenate(global_avg_est_card)
        global_avg_time_elapsed_pre = np.asarray(global_avg_time_elapsed_pre)
        global_sum_time_overlap_post = np.asarray(global_sum_time_overlap_post)
        global_sum_concurrent_runtime_pre = np.asarray(global_sum_concurrent_runtime_pre)
        global_sum_concurrent_runtime_post = np.asarray(global_sum_concurrent_runtime_post)
        global_est_concurrent_scan_pre = np.asarray(global_est_concurrent_scan_pre)
        global_est_concurrent_scan_post = np.asarray(global_est_concurrent_scan_post)
        global_max_concurrent_card_pre = np.asarray(global_max_concurrent_card_pre)
        global_max_concurrent_card_post = np.asarray(global_max_concurrent_card_post)
        global_avg_concurrent_card_pre = np.asarray(global_avg_concurrent_card_pre)
        global_avg_concurrent_card_post = np.asarray(global_avg_concurrent_card_post)
        global_scan_sharing_percentage = np.asarray(global_scan_sharing_percentage)
        feature = (
            global_isolated_runtime,
            global_avg_runtime,
            global_num_concurrency_pre,
            global_num_concurrency_post,
            global_sum_concurrent_runtime_pre,
            global_sum_concurrent_runtime_post,
            global_avg_time_elapsed_pre,
            global_sum_time_overlap_post,
            global_est_scan,
            global_est_concurrent_scan_pre,
            global_est_concurrent_scan_post,
            global_scan_sharing_percentage,
            global_max_est_card,
            global_avg_est_card,
            global_max_concurrent_card_pre,
            global_max_concurrent_card_post,
            global_avg_concurrent_card_pre,
            global_avg_concurrent_card_post
        )
        if self.opt_method == "torch" or self.opt_method == "nn":
            feature = list(feature)
            for i in range(len(feature)):
                feature[i] = torch.from_numpy(feature[i])
            feature = tuple(feature)
            global_y = torch.from_numpy(global_y)
        return feature, global_y, global_query_idx



In [20]:
parsed_queries_path = "/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_aurora/aurora_mixed_parsed_queries.json"
cfc = ComplexFitCurveSeparation(opt_method='scipy')
cfc.pre_process_queries(parsed_queries_path)
cfc.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df, analytic_func=interaction_separation_func_scipy)
predictions_cfc, labels = cfc.predict(eval_trace_df)
print("===========Performance for simple linear regression model (all query)=============")
result_overall_cfc, result_per_query_cfc, result_by_interval_cfc = cfc.evaluate_performance(eval_trace_df, interval=[0, 10, 60])

`xtol` termination condition is satisfied.
Function evaluations 57, initial cost 3.1496e+06, final cost 1.0937e+06, first-order optimality 4.40e+04.
50% absolute error is 6.873689942427311, q-error is 2.149383549802416
90% absolute error is 73.00344264934647, q-error is 9.98984272489963
95% absolute error is 114.58397064382326, q-error is 18.04297299371214
For query in range 0s to 10s, there are 5846 executions
50% absolute error is 2.0037470354583697, q-error is 2.826971688793038
90% absolute error is 20.43418214532607, q-error is 14.52683551902678
95% absolute error is 38.617396136679595, q-error is 25.31544213101032
For query in range 10s to 60s, there are 3041 executions
50% absolute error is 16.01190956477727, q-error is 1.955986761454129
90% absolute error is 85.47447987795465, q-error is 6.250272760038555
95% absolute error is 120.33381263721336, q-error is 10.708050329496764
For query in range 60s to infs, there are 2020 executions
50% absolute error is 42.38566484175186, q-err

In [7]:
list(zip(cfc.param_names, cfc.analytic_params))

[('n1', 0.012499458441714753),
 ('q1', 0.4912910173928935),
 ('i1', 10.666223544774548),
 ('i2', 0.10000000733581918),
 ('c1', 0.09015059272427761),
 ('c2', 0.9999999999999986),
 ('m1', 0.00124510895434049),
 ('m2', 0.7887514688970306),
 ('m3', 0.0010617133613253205),
 ('m4', 0.44040905712707007),
 ('m5', 0.10000000000001112),
 ('cm1', 0.1350892942224451),
 ('r1', 0.44551870235072577),
 ('r2', 0.49999999999999994),
 ('max_concurrency', 14.669684433414567),
 ('avg_io_speed', 810.6615017617146),
 ('memory_size', 15891.37518824329)]

In [8]:
cfc.use_pre_info = True
result_overall_cfc, result_per_query_cfc, result_by_interval_cfc = cfc.evaluate_performance(eval_trace_df, interval=[0, 10, 60])

50% absolute error is 7.213989113269395, q-error is 2.535096108619231
90% absolute error is 101.86023724664493, q-error is 11.608351268273935
95% absolute error is 169.66487127283168, q-error is 20.167396314492343
For query in range 0s to 10s, there are 5846 executions
50% absolute error is 2.0161857725518644, q-error is 2.922016291055555
90% absolute error is 21.101341952919626, q-error is 15.825346779218211
95% absolute error is 40.94259227729892, q-error is 27.701214268426714
For query in range 10s to 60s, there are 3041 executions
50% absolute error is 17.075821605857076, q-error is 2.2855775126280466
90% absolute error is 109.68263671144541, q-error is 9.481384742030585
95% absolute error is 156.89793774582043, q-error is 14.729372288284859
For query in range 60s to infs, there are 2020 executions
50% absolute error is 56.844251060391215, q-error is 2.161494635754143
90% absolute error is 262.154409631, q-error is 5.819698978656869
95% absolute error is 394.04743281914335, q-error

In [None]:
parsed_queries_path = "/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_aurora/aurora_mixed_parsed_queries.json"
cfc = ComplexFitCurve()
cfc.pre_process_queries(parsed_queries_path)
cfc.train(concurrency_df.iloc[train_idx], isolated_trace_df=isolated_trace_df)
predictions_cfc, labels = cfc.predict(eval_trace_df, use_global=True)
print("===========Performance for simple linear regression model (all query)=============")
result_overall_cfc, result_per_query_cfc, result_by_interval_cfc = cfc.evaluate_performance(concurrency_df.iloc[train_idx], interval=[0, 10, 60])

In [9]:
parsed_queries_path = "/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_aurora/aurora_mixed_parsed_queries.json"
cfc = ComplexFitCurve(opt_method='scipy')
cfc.pre_process_queries(parsed_queries_path)
cfc.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df)
predictions_cfc, labels = cfc.predict(eval_trace_df)
print("===========Performance for simple linear regression model (all query)=============")
result_overall_cfc, result_per_query_cfc, result_by_interval_cfc = cfc.evaluate_performance(eval_trace_df, interval=[0, 10, 60])

`ftol` termination condition is satisfied.
Function evaluations 32, initial cost 2.6643e+06, final cost 1.0648e+06, first-order optimality 6.10e+00.
50% absolute error is 4.036703018599082, q-error is 1.7931724607708919
90% absolute error is 75.84357167996453, q-error is 6.745197463893984
95% absolute error is 118.95869234293372, q-error is 11.446624135729719
For query in range 0s to 10s, there are 5846 executions
50% absolute error is 0.9379855044678616, q-error is 1.9348106224447796
90% absolute error is 15.975506831743893, q-error is 9.326160673146102
95% absolute error is 34.927829142785164, q-error is 17.24132031156027
For query in range 10s to 60s, there are 3041 executions
50% absolute error is 12.15656395727639, q-error is 1.6958280271748678
90% absolute error is 80.82107307544314, q-error is 5.47245173051059
95% absolute error is 114.05081573250816, q-error is 8.243076907290371
For query in range 60s to infs, there are 2020 executions
50% absolute error is 46.44120161091159, q

In [None]:
cfc.analytic_params

In [None]:
for i in result_per_query_cfc:
    if cfc.average_rt_cache[i] > 5:
        print(i, result_per_query_cfc[i][0], result_per_query_cfc[i][1], result_per_query_sfc[i][0], result_per_query_sfc[i][1],
              result_per_query_xgb[i][0], result_per_query_xgb[i][1])

In [21]:
cfc.use_pre_info = False
feature, global_y, global_query_idx = cfc.featurize_data(eval_trace_df)
f_t = np.stack(feature).transpose()

In [33]:
i = 0
idx = np.argsort(predictions_cfc[i])
curr_label = global_y[global_query_idx[i][0]: global_query_idx[i][1]]
for j in list(idx[:10]) + list(idx[-10:]):
    print("==================================")
    print(predictions_cfc[i][j], curr_label[j])
    feature = tuple(f_t[global_query_idx[i][0]: global_query_idx[i][1]][j])
    print(feature)
    #pred = interaction_separation_func_scipy_debug(feature, *cfc.analytic_params)
    interaction_separation_func_scipy(feature, *cfc.analytic_params, debug=True)

80.01038942729303 8.943703889846802
(44.55030298233032, 109.1729221343994, 1.0, 0.0, 55.44309878349304, 0.0, 71.57834865134723, 0.0, 6077.552390098572, 3684.9148273468018, 0.0, 0.9954112765064694, 0.7232437133789062, 0.09085226058959961, 467.9256896972656, 0.0, 175.4721441268921, 0.0)
queueing_time 0.0
discount_pre 0.1
io_speed 821.3277253064891
io_time 0.00339550323484207
io_time_old 0.7399668425498369
cpu_time_isolated 79.6423698647146
mem_usage 0.0
cpu_scale 0.004578267321313804
mem_scale 1.0
cpu_time_isolated 79.6423698647146
cpu_time 80.0069939240582
80.45937365547326 4.748476505279541
(44.55030298233032, 109.1729221343994, 3.0, 0.0, 41.40066194534302, 0.0, 9.772476841053, 0.0, 6077.552390098572, 6258.341488838196, 0.0, 0.9698032634704855, 0.7232437133789062, 0.09085226058959961, 267.6192169189453, 0.0, 11.289948743932387, 0.0)
queueing_time 0.0
discount_pre 0.29186082672147234
io_speed 821.3277253064891
io_time 0.02234458378505416
io_time_old 0.7399668425498369
cpu_time_isolated 

In [32]:
i = 0
print(isolated_trace_df["runtime"].iloc[i])
idx = np.argsort(predictions_cfc[i])
np.stack((predictions_cfc[i][idx], predictions_sfc[i][idx], predictions_xgb[i][idx], labels[i][idx]), axis=1)

29.34782457351685


array([[ 80.01038943,  36.6914102 ,  29.46135712,   8.94370389],
       [ 80.45937366,  33.95785477,  23.1088047 ,   4.74847651],
       [ 81.16240513,  33.95785477,  29.51144981,  11.41822219],
       [ 81.69466001,  47.15126465,  36.1072731 ,  24.43374467],
       [ 82.0147639 ,  33.95785477,  26.43943977,  13.10642648],
       [ 83.96224133,  47.15126465,   0.001     ,   8.07139182],
       [ 84.20839136,  86.73149429,  44.2942009 ,  39.24779963],
       [ 85.37212727,  47.15126465,  53.06978226,  24.57741523],
       [ 86.3075151 ,  33.95785477,  91.8082962 ,  54.5951848 ],
       [ 86.43981581,  33.95785477,  26.53706551,  17.60623455],
       [ 87.04921786,  99.92490417,  82.92233276,  85.27525115],
       [ 87.98704869,  47.15126465,  49.38063431,  36.34676528],
       [ 91.0339356 ,  33.95785477,  42.28642273,  23.84352231],
       [ 91.83915691,  60.34467453,  35.54413223,   7.25911045],
       [ 92.66448067, 139.50513381,  68.061203  ,  38.31848145],
       [ 92.87943291,  60

In [None]:
(0)

In [None]:
plan[""]

In [None]:
plan["parsed_queries"][-1]

In [None]:
plan.keys()

In [None]:
np.maximum(np.zeros(3), 1)

In [38]:
sfc = SimpleFitCurve()
sfc.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df)
#predictions, labels = sfc.predict(eval_trace_df)
#print("===========Performance for simple curve fitting model (per query)=============")
#result_overall, result_per_query = sfc.evaluate_performance(eval_trace_df, use_train=True)
predictions_sfc, labels = sfc.predict(eval_trace_df, use_global=True)
print("===========Performance for simple curve fitting model (all query)=============")
result_overall_sfc, result_per_query_sfc, result_by_interval_sfc = sfc.evaluate_performance(eval_trace_df, use_global=True, interval=[0, 10, 60])

50% absolute error is 10.26358612688191, q-error is 4.330766611975611
90% absolute error is 88.78408571569167, q-error is 43.665750587953184
95% absolute error is 116.4081436502695, q-error is 109.10279384618602
For query in range 0s to 10s, there are 540 executions
50% absolute error is 1.9812749360839172, q-error is 3.0131166356278847
90% absolute error is 19.950304748371643, q-error is 18.271560707627014
95% absolute error is 24.9536131213566, q-error is 28.925998084923908
For query in range 10s to 60s, there are 256 executions
50% absolute error is 17.597095441949197, q-error is 7.511464942587086
90% absolute error is 46.609297356320226, q-error is 53.48318296037619
95% absolute error is 59.5536873006706, q-error is 92.06065881195197
For query in range 60s to infs, there are 253 executions
50% absolute error is 75.17097942926829, q-error is 5.70061428823845
90% absolute error is 153.61593521237623, q-error is 170.6253688588455
95% absolute error is 240.02729516233825, q-error is 35



In [39]:
xgb = XGBoostPredictor(k=240)
xgb.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df, use_pre_exec_info=True)
predictions_xgb, labels = xgb.predict(eval_trace_df)
#predictions_xgb, labels = xgb.predict(concurrency_df.iloc[train_idx], use_train=False)
print("===========Performance for XGBoost model (train on full)=============")
result_overall_xgb, result_per_query_xgb, result_by_interval_xgb = xgb.evaluate_performance(eval_trace_df, interval=[0, 10, 60])
#result_overall_xgb, result_per_query_xgb = xgb.evaluate_performance(concurrency_df.iloc[train_idx], use_train=False)

50% absolute error is 6.82193911075592, q-error is 1.8462710247892797
90% absolute error is 33.401027417182924, q-error is 15.358922600463911
95% absolute error is 47.82344834804532, q-error is 136.7646268972089
For query in range 0s to 10s, there are 535 executions
50% absolute error is 3.1627433300018315, q-error is 3.2662633237851946
90% absolute error is 13.892200040817267, q-error is 125.3731191213906
95% absolute error is 17.533620405197137, q-error is 886.7988403637504
For query in range 10s to 60s, there are 254 executions
50% absolute error is 10.94163000583649, q-error is 1.6870888337045005
90% absolute error is 30.393349456787114, q-error is 3.8482550475213686
95% absolute error is 38.70606662034988, q-error is 5.407276915872149
For query in range 60s to infs, there are 253 executions
50% absolute error is 23.480167865753174, q-error is 1.2679038067436363
90% absolute error is 62.416790246963565, q-error is 1.911761592391889
95% absolute error is 78.46281461715694, q-error i