In [7]:
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.append("../")
from utils.load_brad_trace import load_trace, create_concurrency_dataset, load_trace_all_version
from models.concurrency.analytical_models import SimpleFitCurve, ComplexFitCurve
from models.concurrency.xgboost import XGBoostPredictor
from models.concurrency.linear_regression import SimpleLinearReg
np.set_printoptions(suppress=True)

In [8]:
folder_name = "mixed_aurora"
directory = f"/Users/ziniuw/Desktop/research/Data/AWS_trace/{folder_name}/"
all_raw_trace, all_trace = load_trace_all_version(directory, 8, concat=True)
all_concurrency_df = []
for trace in all_trace:
    concurrency_df = create_concurrency_dataset(trace, engine=None, pre_exec_interval=200)
    all_concurrency_df.append(concurrency_df)
concurrency_df = pd.concat(all_concurrency_df, ignore_index=True)
isolated_trace_df = pd.read_csv(f"/Users/ziniuw/Desktop/research/Data/AWS_trace/{folder_name}/repeating_olap_batch_warmup.csv")
#isolated_trace_df = pd.read_csv(f"/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_redshift/repeating_olap_batch_warmup.csv")
isolated_trace_df["runtime"] = isolated_trace_df["run_time_s"]
isolated_rt_cache = dict()
for i, rows in isolated_trace_df.groupby("query_idx"):
    isolated_rt_cache[i] = np.median(rows["runtime"])

In [9]:
for i, rows in concurrency_df.groupby("query_idx"):
    runtime = rows["runtime"].values
    print(i, len(rows), isolated_rt_cache[i], np.mean(runtime), np.min(runtime), np.max(runtime), np.std(runtime))

0 229 44.55030298233032 149.43657204573853 3.882469892501831 928.3928072452544 162.89973109444279
1 230 0.2543864250183105 0.6743606847265492 0.0791635513305664 7.855563879013061 1.0985220381229965
2 226 23.157404899597168 152.3746449219442 6.902190923690796 949.1584985256196 156.9762234685832
3 231 0.283794641494751 0.40228485235404143 0.0791168212890625 10.761748790740969 0.8148074521967292
4 228 4.330385208129883 21.139591959484836 0.0939805507659912 173.76991820335388 28.61461356127559
5 233 2.5214909315109253 17.549908976698127 2.4990737438201904 248.4405851364136 33.45214806905375
6 229 2.5594241619110107 3.7198088002517236 2.478666305541992 18.826606512069706 2.124091232234117
7 230 2.2192370891571045 10.955201000752657 1.7622826099395752 173.82364749908447 23.275037854071385
8 232 0.3999779224395752 1.2264011348115986 0.1878495216369629 19.547641038894653 2.632632301241479
9 232 0.7344051599502563 0.10832449588282352 0.0426087379455566 8.005128622055054 0.5284501040445094
10 22

In [10]:
np.random.seed(0)
train_idx = np.random.choice(len(concurrency_df), size=int(0.8 * len(concurrency_df)), replace=False)
test_idx = [i for i in range(len(concurrency_df)) if i not in train_idx]
eval_trace_df = concurrency_df.iloc[test_idx]
eval_trace_df = eval_trace_df[eval_trace_df['num_concurrent_queries'] > 0]

In [11]:
import numpy as np
from xgboost import XGBRegressor
import torch
import torch.optim as optim
from torch.nn.functional import l1_loss, mse_loss
import scipy.optimize as optimization
from torch.utils.data import DataLoader
from models.concurrency.base_model import ConcurPredictor
from models.concurrency.utils import QueryFeatureDataset, SimpleNet
from models.concurrency.analytical_functions import (
    simple_queueing_func,
    interaction_func_torch,
    interaction_func_scipy,
)
from parser.utils import load_json, dfs_cardinality, estimate_scan_in_mb


class SimpleFitCurve(ConcurPredictor):
    """
    Simple fit curve model for runtime prediction with concurrency
    runtime = queue_time(num_concurrency) + alpha(num_concurrency) * isolated_runtime
            = (a1 * max(num_concurrency-b1, 0)) + (1 + a2*min(num_concurrency, b1)) * isolated_runtime
    optimize a1, b1, b2
    """

    def __init__(self):
        super().__init__()
        self.isolated_rt_cache = dict()
        self.use_train = True
        self.a1_global = 0
        self.a1 = dict()
        self.b1_global = 0
        self.b1 = dict()
        self.a2_global = 0
        self.a2 = dict()

    def train(self, trace_df, use_train=True, isolated_trace_df=None):
        self.use_train = use_train
        self.get_isolated_runtime_cache(trace_df, isolated_trace_df)
        concurrent_df = trace_df[trace_df["num_concurrent_queries"] > 0]

        global_y = []
        global_x = []
        global_ir = []
        for i, rows in concurrent_df.groupby("query_idx"):
            if i not in self.isolated_rt_cache:
                continue
            isolated_rt = self.isolated_rt_cache[i]
            concurrent_rt = rows["runtime"].values
            if use_train:
                num_concurrency = rows["num_concurrent_queries_train"].values
            else:
                num_concurrency = rows["num_concurrent_queries"].values
            if len(num_concurrency) < 10:
                continue
            global_y.append(concurrent_rt)
            global_x.append(num_concurrency)
            global_ir.append(np.ones(len(num_concurrency)) * isolated_rt)
            fit, _ = optimization.curve_fit(
                simple_queueing_func,
                (num_concurrency, np.ones(len(num_concurrency)) * isolated_rt),
                concurrent_rt,
                np.array([5, 0.1, 20]),
            )
            self.a1[i] = fit[0]
            self.a2[i] = fit[1]
            self.b1[i] = fit[2]
        global_y = np.concatenate(global_y)
        global_x = np.concatenate(global_x)
        global_ir = np.concatenate(global_ir)
        fit, _ = optimization.curve_fit(
            simple_queueing_func,
            (global_x, global_ir),
            global_y,
            np.array([5, 0.1, 20]),
        )
        self.a1_global = fit[0]
        self.a2_global = fit[1]
        self.b1_global = fit[2]

    def predict(self, eval_trace_df, use_global=False):
        predictions = dict()
        labels = dict()
        for i, rows in eval_trace_df.groupby("query_idx"):
            if i not in self.isolated_rt_cache or i not in self.a1:
                continue
            isolated_rt = self.isolated_rt_cache[i]
            label = rows["runtime"].values
            labels[i] = label
            if self.use_train:
                num_concurrency = rows["num_concurrent_queries_train"].values
            else:
                num_concurrency = rows["num_concurrent_queries"].values
            x = (num_concurrency, np.ones(len(num_concurrency)) * isolated_rt)
            if use_global:
                pred = simple_queueing_func(
                    x, self.a1_global, self.a2_global, self.b1_global
                )
            else:
                pred = simple_queueing_func(x, self.a1[i], self.a2[i], self.b1[i])
            pred = np.maximum(pred, 0.001)
            predictions[i] = pred
        return predictions, labels


def fit_curve_loss_torch(x, y, params, constrain, loss_func="soft_l1", penalties=None):
    pred = interaction_func_torch(x, *params)
    lb = constrain.lb
    ub = constrain.ub


    if loss_func == "mae":
        loss = torch.abs(pred - y)
    elif loss_func == "mse":
        # loss = (pred - y) ** 2
        criterion = torch.nn.MSELoss(reduction='sum')
        loss = criterion(pred, y)
    elif loss_func == "soft_l1":
        # loss = torch.sqrt(1 + (pred - y) ** 2) - 1
        criterion = torch.nn.SmoothL1Loss()
        loss = criterion(pred, y)
    else:
        assert False, f"loss func {loss_func} not implemented"
    loss = torch.mean(loss)
    for i, p in enumerate(params):
        if penalties is not None:
            penalty = penalties[i]
        else:
            penalty = 1
        pen = torch.exp(penalty * (p - ub[i])) + torch.exp(-1 * penalty * (p - lb[i]))
        loss += pen
    return loss


class ComplexFitCurve(ConcurPredictor):
    """
    Complex fit curve model for runtime prediction with concurrency
    See interaction_func_scipy for detailed analytical functions
    """

    def __init__(self, is_column_store=False, opt_method="scipy"):
        """

        :param is_column_store:
        :param opt_method:
        """
        # indicate whether the DBMS is a column_store
        #
        super().__init__()
        self.isolated_rt_cache = dict()
        self.average_rt_cache = dict()
        self.query_info = dict()
        self.db_stats = None
        self.table_sizes = dict()
        self.table_sizes_by_index = dict()
        self.table_nrows_by_index = dict()
        self.table_column_map = dict()
        self.use_pre_info = False
        self.use_post_info = False
        self.is_column_store = is_column_store
        self.opt_method = opt_method
        self.batch_size = 1024
        self.buffer_pool_size = 128
        self.analytic_params = [
            0.5,
            20,
            2,
            0.2,
            0.5,
            0.5,
            0.1,
            0.2,
            0.8,
            0.2,
            10,
            200,
            16000,
        ]
        self.bound = optimization.Bounds(
            [0.1, 10, 0.01, 0.001, 0.001, 0.001, 0.001, 0.001, 0.5, 0.05, 2, 20, 10000],
            [1, 200, 2, 1, 0.9, 0.9, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.constrain = optimization.Bounds(
            [0.1, 10, 0.1, 0.01, 0.01, 0.01, 0.01, 0.1, 0.5, 0.05, 2, 20, 10000],
            [1, 200, 2, 1, 0.9, 0.9, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.penalty = [100, 0.1, 100, 100, 100, 100, 100, 100, 100, 100, 1, 0.1, 0.01]
        self.loss_func = "mse"
        self.model = None
        self.analytic_func = None

    def _compute_table_size(self):
        for col in self.db_stats["column_stats"]:
            table = col["tablename"]
            if table not in self.table_column_map:
                self.table_sizes[table] = 0
                self.table_column_map[table] = []
            self.table_column_map[table].append(col["attname"])
            if col["avg_width"] is not None and col["avg_width"] > 0:
                self.table_sizes[table] += col["avg_width"]
        all_table_names = [t["relname"] for t in self.db_stats["table_stats"]]
        for table in self.table_sizes:
            if table in all_table_names:
                idx = all_table_names.index(table)
                num_tuples = self.db_stats["table_stats"][idx]["reltuples"]
                self.table_nrows_by_index[idx] = num_tuples
                size_in_mb = (num_tuples * self.table_sizes[table]) / (1024 * 1024)
                self.table_sizes[table] = size_in_mb
                self.table_sizes_by_index[idx] = size_in_mb

    def pre_process_queries(
        self, parsed_queries_path, with_width=True, use_true_card=False
    ):
        plans = load_json(parsed_queries_path, namespace=False)
        self.db_stats = plans["database_stats"]
        self._compute_table_size()
        self.query_info = dict()
        for i in range(len(plans["sql_queries"])):
            curr_query_info = dict()
            curr_query_info["sql"] = plans["sql_queries"][i]
            all_cardinality = []
            dfs_cardinality(
                plans["parsed_plans"][i], all_cardinality, with_width, use_true_card
            )
            curr_query_info["all_cardinality"] = all_cardinality
            est_scan, est_scan_per_table = estimate_scan_in_mb(
                self.db_stats,
                plans["parsed_queries"][i],
                use_true_card,
                self.is_column_store,
            )
            curr_query_info["est_scan"] = est_scan
            curr_query_info["est_scan_per_table"] = est_scan_per_table
            self.query_info[i] = curr_query_info

    def estimate_data_share_percentage(self, idx, concur_info, pre_exec_info=None):
        # TODO: make it smarter by considering buffer pool behavior
        curr_scan = self.query_info[idx]["est_scan_per_table"]
        curr_total_scan = self.query_info[idx]["est_scan"]
        if pre_exec_info is not None:
            concur_info = concur_info + pre_exec_info
        all_shared_scan = 0
        for table in curr_scan:
            table_size = self.table_sizes_by_index[table]
            table_shared_scan = 0
            for c in concur_info:
                concur_scan = self.query_info[c[0]]["est_scan_per_table"]
                if table in concur_scan:
                    concur_scan_perc = concur_scan[table] / table_size
                    overlap_scan = concur_scan_perc * curr_scan[table]
                    table_shared_scan += overlap_scan
            table_shared_scan = min(table_shared_scan, curr_scan[table])
            all_shared_scan += table_shared_scan
        return min(all_shared_scan / curr_total_scan, 1.0)

    def featurize_data(self, concurrent_df):
        global_y = []
        global_isolated_runtime = []
        global_avg_runtime = []
        global_num_concurrency = []
        global_sum_concurrent_runtime = []
        global_est_scan = []
        global_est_concurrent_scan = []
        global_scan_sharing_percentage = []
        global_max_est_card = []
        global_avg_est_card = []
        global_max_concurrent_card = []
        global_avg_concurrent_card = []
        global_query_idx = dict()
        start = 0
        for i, rows in concurrent_df.groupby("query_idx"):
            if (
                i not in self.isolated_rt_cache
                or i not in self.query_info
                or i not in self.average_rt_cache
            ):
                continue
            concurrent_rt = rows["runtime"].values
            query_info = self.query_info[i]
            n_rows = len(rows)
            if self.use_pre_info:
                num_concurrency = rows["num_concurrent_queries_train"].values
                concur_info = rows["concur_info_train"].values
            else:
                num_concurrency = rows["num_concurrent_queries"].values
                concur_info = rows["concur_info"].values
            pre_exec_info = rows["pre_exec_info"].values

            global_query_idx[i] = (start, start + n_rows)
            start += n_rows
            global_y.append(concurrent_rt)
            global_isolated_runtime.append(np.ones(n_rows) * self.isolated_rt_cache[i])
            global_avg_runtime.append(np.ones(n_rows) * self.average_rt_cache[i])
            global_num_concurrency.append(num_concurrency)
            global_est_scan.append(np.ones(n_rows) * query_info["est_scan"])
            global_max_est_card.append(
                np.ones(n_rows) * np.max(query_info["all_cardinality"]) / (1024 * 1024)
            )
            global_avg_est_card.append(
                np.ones(n_rows)
                * np.average(query_info["all_cardinality"])
                / (1024 * 1024)
            )
            for j in range(n_rows):
                sum_concurrent_runtime = 0
                sum_concurrent_scan = 0
                concurrent_card = []
                for c in concur_info[j]:
                    if c[0] in self.average_rt_cache:
                        sum_concurrent_runtime += self.average_rt_cache[c[0]]
                    else:
                        print(c[0])
                    if c[0] in self.query_info:
                        sum_concurrent_scan += self.query_info[c[0]]["est_scan"]
                        concurrent_card.extend(self.query_info[c[0]]["all_cardinality"])
                    else:
                        print(c[0])

                global_sum_concurrent_runtime.append(sum_concurrent_runtime)
                global_est_concurrent_scan.append(sum_concurrent_scan)
                if len(concurrent_card) == 0:
                    global_max_concurrent_card.append(0)
                    global_avg_concurrent_card.append(0)
                else:
                    global_max_concurrent_card.append(
                        np.max(concurrent_card) / (1024 * 1024)
                    )
                    global_avg_concurrent_card.append(
                        np.average(concurrent_card) / (1024 * 1024)
                    )
                global_scan_sharing_percentage.append(
                    self.estimate_data_share_percentage(
                        i, concur_info[j], pre_exec_info[j]
                    )
                )

        global_y = np.concatenate(global_y)
        global_isolated_runtime = np.concatenate(global_isolated_runtime)
        global_avg_runtime = np.concatenate(global_avg_runtime)
        global_num_concurrency = np.concatenate(global_num_concurrency)
        global_est_scan = np.concatenate(global_est_scan)
        global_max_est_card = np.concatenate(global_max_est_card)
        global_avg_est_card = np.concatenate(global_avg_est_card)
        global_sum_concurrent_runtime = np.asarray(global_sum_concurrent_runtime)
        global_est_concurrent_scan = np.asarray(global_est_concurrent_scan)
        global_max_concurrent_card = np.asarray(global_max_concurrent_card)
        global_avg_concurrent_card = np.asarray(global_avg_concurrent_card)
        global_scan_sharing_percentage = np.asarray(global_scan_sharing_percentage)
        feature = (
            global_isolated_runtime,
            global_avg_runtime,
            global_num_concurrency,
            global_sum_concurrent_runtime,
            global_est_scan,
            global_est_concurrent_scan,
            global_scan_sharing_percentage,
            global_max_est_card,
            global_avg_est_card,
            global_max_concurrent_card,
            global_avg_concurrent_card,
        )
        if self.opt_method == "torch" or self.opt_method == "nn":
            feature = list(feature)
            for i in range(len(feature)):
                feature[i] = torch.from_numpy(feature[i])
            feature = tuple(feature)
            global_y = torch.from_numpy(global_y)
        return feature, global_y, global_query_idx

    def train(
        self, trace_df, use_train=True, isolated_trace_df=None, analytic_func=None
    ):
        if analytic_func is None:
            analytic_func = interaction_func_scipy
        self.analytic_func = analytic_func
        self.use_pre_info = use_train
        self.get_isolated_runtime_cache(
            trace_df, isolated_trace_df, get_avg_runtime=True
        )
        concurrent_df = trace_df[trace_df["num_concurrent_queries"] > 0]
        feature, label, _ = self.featurize_data(concurrent_df)

        initial_param_value = np.asarray(self.analytic_params)
        if self.opt_method == "scipy":
            fit, _ = optimization.curve_fit(
                self.analytic_func,
                feature,
                label,
                initial_param_value,
                bounds=self.bound,
                jac="3-point",
                method="trf",
                loss="soft_l1",
                verbose=1,
            )
            self.analytic_params = list(fit)
        elif self.opt_method == "torch":
            torch_analytic_params = []
            torch_analytic_params_lr = []
            for p in self.analytic_params:
                if p == 10:
                    t_p = torch.tensor(float(p), requires_grad=False)
                else:
                    t_p = torch.tensor(float(p), requires_grad=True)
                # t_p = torch.tensor(float(p), requires_grad=True)
                # t_p.requires_grad = True
                # torch_analytic_params_lr.append({'params': t_p, 'lr': 0.01 * p ** 0.3})
                torch_analytic_params_lr.append({'params': t_p, 'lr': 1e-4})
                torch_analytic_params.append(t_p)
            print(len(torch_analytic_params))
            optimizer = optim.AdamW(torch_analytic_params_lr, weight_decay=2e-5)
            dataset = QueryFeatureDataset(feature, label)
            train_dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
            for epoch in range(800):
                for X, y in train_dataloader:
                    # X = X.to('mps')
                    # y = y.to(torch.float32).to('mps')
                    optimizer.zero_grad()
                    loss = fit_curve_loss_torch(X, y, torch_analytic_params,
                                                self.constrain, loss_func=self.loss_func, penalties=self.penalty)
                    loss.backward()
                    # for p in torch_analytic_params:
                    #     print(p.grad)
                    optimizer.step()
                if epoch % 50 == 0:
                    print(epoch, loss.item())
                    # print(torch_analytic_params)
            for i in range(len(self.analytic_params)):
                self.analytic_params[i] = torch_analytic_params[i].detach()
        elif self.opt_method == "nn":
            dataset = QueryFeatureDataset(feature, label)
            train_dataloader = DataLoader(
                dataset, batch_size=self.batch_size, shuffle=True
            )
            self.model = SimpleNet(len(feature), hidden_dim=32, layers=3)
            optimizer = optim.Adam(self.model.parameters(), lr=1e-4, weight_decay=0)
            for epoch in range(40):
                batch_loss = 0
                for X, y in train_dataloader:
                    #X = torch.stack(X).float()
                    #X = torch.transpose(X, 0, 1)
                    optimizer.zero_grad()
                    pred = self.model(X)
                    pred = pred.reshape(-1)
                    loss = mse_loss(pred, y)
                    loss.backward()
                    optimizer.step()
                    batch_loss += loss.item()
                if epoch % 1 == 0:
                    print(epoch, batch_loss)
        elif self.opt_method == "xgboost":
            feature = np.stack(feature).T
            model = XGBRegressor(
                n_estimators=1000,
                max_depth=7,
                eta=0.2,
                subsample=1.0,
                eval_metric="mae",
                early_stopping_rounds=100,
            )
            train_idx = np.random.choice(
                len(feature), size=int(0.8 * len(feature)), replace=False
            )
            val_idx = [i for i in range(len(feature)) if i not in train_idx]
            model.fit(
                feature[train_idx],
                label[train_idx],
                eval_set=[(feature[val_idx], label[val_idx])],
                verbose=False,
            )
            self.model = model
        else:
            assert False, f"unrecognized optimization method {self.opt_method}"

    def predict(self, eval_trace_df, use_global=False, return_per_query=True):
        if self.analytic_func is None:
            self.analytic_func = interaction_func_scipy
        feature, labels, query_idx = self.featurize_data(eval_trace_df)
        if self.opt_method == "scipy":
            preds = self.analytic_func(feature, *self.analytic_params)
        elif self.opt_method == "torch":
            feature = torch.stack(feature).float()
            feature = torch.transpose(feature, 0, 1)
            preds = interaction_func_torch(
                feature,
                *self.analytic_params
            )
            preds = preds.numpy()
            labels = labels.numpy()
        elif self.opt_method == "nn":
            feature = torch.stack(feature).float()
            feature = torch.transpose(feature, 0, 1)
            preds = self.model(feature)
            preds = preds.reshape(-1)
            preds = preds.detach().numpy()
            labels = labels.numpy()
        elif self.opt_method == "xgboost":
            feature = np.stack(feature).T
            preds = self.model.predict(feature)
            preds = np.maximum(preds, 0.001)
        else:
            assert False, f"unrecognized optimization method {self.opt_method}"
        if return_per_query:
            preds_per_query = dict()
            labels_per_query = dict()
            for i in query_idx:
                start, end = query_idx[i]
                preds_per_query[i] = preds[start:end]
                labels_per_query[i] = labels[start:end]
            return preds_per_query, labels_per_query
        else:
            return preds, labels


In [12]:
def interaction_separation_func_scipy(
    x,
    n1,
    q1,
    i1,
    i2,
    c1,
    c2,
    m1,
    m2,
    m3,
    m4,
    m5,
    cm1,
    r1,
    r2,
    max_concurrency,
    avg_io_speed,
    memory_size,
    debug=False
):
    """
    An analytical function that can consider 3 types of resource sharing/contention: IO, memory, CPU
    x:: input tuple containing:
        isolated_runtime: the isolated runtime without concurrency of a query
        avg_runtime: average or median observed runtime of a query under any concurrency
        num_concurrency: number of concurrent queries running with this query
        sum_concurrent_runtime: sum of the estimated runtime of all queries concurrently running with this query (CPU)
        est_scan: estimated MB of data that this query will need to scan (IO)
        est_concurrent_scan: estimated MB of data that the concurrently running queries will need to scan (IO)
        scan_sharing_percentage: estimated percentage of data in cache (sharing) according to concurrent queries
        max_est_card: maximum estimated cardinality in the query plan of this query (reflect peak memory usage)
        avg_est_card: average estimated cardinality in the query plan of this query (reflect average memory usage)
        max_concurrent_card: maximum estimated cardinality for all concurrent queries
        avg_concurrent_card: average estimated cardinality for all concurrent queries
    TODO: adding memory, vCPU, and bandwidth information
    """
    (
        isolated_runtime,
        avg_runtime,
        num_concurrency_pre,
        num_concurrency_post,
        sum_concurrent_runtime_pre,
        sum_concurrent_runtime_post,
        avg_time_elapsed_pre,
        sum_time_overlap_post,
        est_scan,
        est_concurrent_scan_pre,
        est_concurrent_scan_post,
        scan_sharing_percentage,
        max_est_card,
        avg_est_card,
        max_concurrent_card_pre,
        max_concurrent_card_post,
        avg_concurrent_card_pre,
        avg_concurrent_card_post
    ) = x
    # fraction of running queries (as opposed to queueing queries)
    running_frac = np.minimum(num_concurrency_pre + num_concurrency_post, max_concurrency) / np.maximum(
        num_concurrency_pre + num_concurrency_post, 1
    )
    # estimate queueing time of a query based on the sum of concurrent queries' run time
    queueing_time = (
        q1
        * (
            np.maximum(num_concurrency_pre + n1 * num_concurrency_post - max_concurrency, 0)
            / np.maximum(num_concurrency_pre + n1 * num_concurrency_post, 1)
        )
        *
        (sum_concurrent_runtime_pre + n1 * sum_concurrent_runtime_post - avg_time_elapsed_pre * num_concurrency_pre)
    )
    queueing_time = np.maximum(queueing_time, 0)
    if debug:
        print("queueing_time", queueing_time)
    discount_pre = (sum_concurrent_runtime_pre - avg_time_elapsed_pre * num_concurrency_pre) * running_frac / np.maximum(sum_concurrent_runtime_pre, 0.1)
    discount_pre = np.maximum(discount_pre, 0.1)
    if debug:
        print("discount_pre", discount_pre)
    discount_post = sum_time_overlap_post / np.maximum(sum_concurrent_runtime_post, 0.1)
    # estimate io_speed of a query assuming each query has a base io_speed of i1 + the io speed due to contention
    io_speed = i1 + avg_io_speed / np.minimum(
        np.maximum(num_concurrency_pre * discount_pre + n1 * num_concurrency_post * discount_post, 1), max_concurrency
    )
    if debug:
        print("io_speed", io_speed)
    # estimate time speed on IO as the (estimated scan - data in cache) / estimated io_speed
    # use i2 to adjust the estimation error in est_scan and scan_sharing_percentage
    io_time = i2 * est_scan * np.maximum((1 - scan_sharing_percentage * running_frac), 0) / io_speed
    if debug:
        print("io_time", io_time)
    io_time_old = i2 * est_scan / io_speed
    if debug:
        print("io_time_old", io_time_old)
    # estimate the amount of CPU work/time as the weighted average of isolated_runtime and avg_runtime - io_time
    cpu_time_isolated = np.maximum((r1 * isolated_runtime + (1 - r1) * avg_runtime) - io_time_old, 0.1)
    if debug:
        print("cpu_time_isolated", cpu_time_isolated)
    # estimate the amount of CPU work imposed by the concurrent queries (approximated by their estimate runtime)
    cpu_concurrent_pre = (sum_concurrent_runtime_pre * discount_pre) / np.maximum(avg_runtime, 2)
    cpu_concurrent_post = sum_time_overlap_post / np.maximum(avg_runtime, 2)
    #cpu_concurrent_pre = np.sqrt((sum_concurrent_runtime_pre * discount_pre) / np.maximum(avg_runtime, 2))
    #cpu_concurrent_post = np.sqrt(sum_time_overlap_post / np.maximum(avg_runtime, 2))
    
    # estimate the amount of memory load imposed by the concurrent queries
    max_mem_usage_perc_pre = max_est_card / (max_concurrent_card_pre + max_est_card)
    avg_mem_usage_perc_pre = avg_est_card / (avg_concurrent_card_pre + avg_est_card)
    max_mem_usage_perc_post = max_est_card / (max_concurrent_card_post + max_est_card)
    avg_mem_usage_perc_post = avg_est_card / (avg_concurrent_card_post + avg_est_card)
    peak_mem_usage = (np.maximum(m1 * (max_concurrent_card_pre + max_est_card) - memory_size, 0) * max_mem_usage_perc_pre + 
        np.maximum(m1 * (max_concurrent_card_post + max_est_card) - memory_size, 0)
        * max_mem_usage_perc_post
        ) / memory_size
    avg_mem_usage = (np.maximum(m2 * (avg_concurrent_card_pre + avg_est_card) - memory_size, 0)
        * avg_mem_usage_perc_pre + np.maximum(m2 * (avg_concurrent_card_post + avg_est_card) - memory_size, 0)
        * avg_mem_usage_perc_post
        ) / memory_size
    mem_usage = m3 * np.sqrt(peak_mem_usage) + m4 * np.sqrt(avg_mem_usage)
    if debug:
        print("mem_usage", mem_usage)
    # estimate the CPU time of a query by considering the contention of CPU and memory of other queries

    cpu_time_scale_factor = (
        c1 * (cpu_concurrent_pre + c2 * cpu_concurrent_post)
    ) * (1 + mem_usage + cm1 * np.sqrt((cpu_concurrent_pre + cpu_concurrent_post) * mem_usage)) 
    cpu_time = (1 + cpu_time_scale_factor) * cpu_time_isolated
    if debug:
        print("cpu_scale", c1 * (cpu_concurrent_pre + c2 * cpu_concurrent_post))
        print("mem_scale", 1 + mem_usage + cm1 * np.sqrt((cpu_concurrent_pre + cpu_concurrent_post) * mem_usage))
    if debug:
        print("cpu_time_isolated", cpu_time_isolated)
    if debug:
        print("cpu_time", cpu_time)
    """
    print("==============================================================================")
    print("est_scan:", np.min(est_scan), np.mean(est_scan), np.max(est_scan))
    print("cpu_concurrent_pre:", np.min(cpu_concurrent_pre), np.mean(cpu_concurrent_pre), np.max(cpu_concurrent_pre))
    print("cpu_concurrent_post:", np.min(cpu_concurrent_post), np.mean(cpu_concurrent_post), np.max(cpu_concurrent_post))
    print("max_est_card:", np.min(max_est_card), np.mean(max_est_card), np.max(max_est_card))
    #print("max_concurrent_card_pre:", np.min(max_concurrent_card_pre), np.mean(max_concurrent_card_pre), np.max(max_concurrent_card_pre))
    #print("max_concurrent_card_post:", np.min(max_concurrent_card_post), np.mean(max_concurrent_card_post), np.max(max_concurrent_card_post))
    print("avg_est_card:", np.min(avg_est_card), np.mean(avg_est_card), np.max(avg_est_card))
    #print("avg_concurrent_card_pre:", np.min(avg_concurrent_card_pre), np.mean(avg_concurrent_card_pre), np.max(avg_concurrent_card_pre))
    #print("avg_concurrent_card_post:", np.min(avg_concurrent_card_post), np.mean(avg_concurrent_card_post), np.max(avg_concurrent_card_post))
    print("peak_mem_usage:", np.min(peak_mem_usage), np.mean(peak_mem_usage), np.max(peak_mem_usage))
    print("avg_mem_usage:", np.min(avg_mem_usage), np.mean(avg_mem_usage), np.max(avg_mem_usage))
    print("mem_usage:", np.min(mem_usage), np.mean(mem_usage), np.max(mem_usage))
    print("cpu_time_isolated", np.min(cpu_time_isolated), np.mean(cpu_time_isolated),  np.max(cpu_time_isolated))
    print("queueing time:", np.min(queueing_time), np.mean(queueing_time), np.max(queueing_time))
    print("io time:", np.min(io_time), np.mean(io_time), np.max(io_time))
    print("CPU time:", np.min(cpu_time), np.mean(cpu_time), np.max(cpu_time))
    # final runtime of a query is estimated to be the queueing time + io_time + cpu_time
    """
    return np.maximum(queueing_time + io_time + cpu_time, 0.01)


def interaction_separation_func_scipy_archive(
    x,
    n1,
    q1,
    i1,
    i2,
    c1,
    c2,
    m1,
    m2,
    m3,
    m4,
    m5,
    cm1,
    r1,
    r2,
    max_concurrency,
    avg_io_speed,
    memory_size,
):
    """
    An analytical function that can consider 3 types of resource sharing/contention: IO, memory, CPU
    x:: input tuple containing:
        isolated_runtime: the isolated runtime without concurrency of a query
        avg_runtime: average or median observed runtime of a query under any concurrency
        num_concurrency: number of concurrent queries running with this query
        sum_concurrent_runtime: sum of the estimated runtime of all queries concurrently running with this query (CPU)
        est_scan: estimated MB of data that this query will need to scan (IO)
        est_concurrent_scan: estimated MB of data that the concurrently running queries will need to scan (IO)
        scan_sharing_percentage: estimated percentage of data in cache (sharing) according to concurrent queries
        max_est_card: maximum estimated cardinality in the query plan of this query (reflect peak memory usage)
        avg_est_card: average estimated cardinality in the query plan of this query (reflect average memory usage)
        max_concurrent_card: maximum estimated cardinality for all concurrent queries
        avg_concurrent_card: average estimated cardinality for all concurrent queries
    TODO: adding memory and CPU information
    """
    (
        isolated_runtime,
        avg_runtime,
        num_concurrency_pre,
        num_concurrency_post,
        sum_concurrent_runtime_pre,
        sum_concurrent_runtime_post,
        avg_time_elapsed_pre,
        sum_time_overlap_post,
        est_scan,
        est_concurrent_scan_pre,
        est_concurrent_scan_post,
        scan_sharing_percentage,
        max_est_card,
        avg_est_card,
        max_concurrent_card_pre,
        max_concurrent_card_post,
        avg_concurrent_card_pre,
        avg_concurrent_card_post
    ) = x
    # fraction of running queries (as opposed to queueing queries)
    running_frac = np.minimum(num_concurrency_pre + num_concurrency_post, max_concurrency) / np.maximum(
        num_concurrency_pre + num_concurrency_post, 1
    )
    # estimate queueing time of a query based on the sum of concurrent queries' run time
    queueing_time = (
        q1
        * (
            np.maximum(num_concurrency_pre + n1 * num_concurrency_post - max_concurrency, 0)
            / np.maximum(num_concurrency_pre + n1 * num_concurrency_post, 1)
        )
        *
        (sum_concurrent_runtime_pre + n1 * sum_concurrent_runtime_post - avg_time_elapsed_pre * num_concurrency_pre)
    )
    queueing_time = np.maximum(queueing_time, 0)
    discount_pre = (sum_concurrent_runtime_pre - avg_time_elapsed_pre * num_concurrency_pre) * running_frac / np.maximum(sum_concurrent_runtime_pre, 0.1)
    discount_pre = np.maximum(discount_pre, 0)
    discount_post = sum_time_overlap_post / np.maximum(sum_concurrent_runtime_post, 0.1)
    # estimate io_speed of a query assuming each query has a base io_speed of i1 + the io speed due to contention
    io_speed = i1 + avg_io_speed / np.minimum(
        np.maximum(num_concurrency_pre * discount_pre + n1 * num_concurrency_post * discount_post, 1), max_concurrency
    )
    # estimate time speed on IO as the (estimated scan - data in cache) / estimated io_speed
    # use i2 to adjust the estimation error in est_scan and scan_sharing_percentage
    print("est_scan:", np.min(est_scan), np.mean(est_scan), np.max(est_scan))
    io_time = i2 * est_scan * (1 - scan_sharing_percentage * running_frac) / io_speed
    io_time_old = i2 * est_scan / io_speed
    # estimate the amount of CPU work/time as the weighted average of isolated_runtime and avg_runtime - io_time
    cpu_time_isolated = np.maximum((r1 * isolated_runtime + r2 * avg_runtime) - io_time_old, 0.1)
    # estimate the amount of CPU work imposed by the concurrent queries (approximated by their estimate runtime)
    cpu_concurrent_pre = (sum_concurrent_runtime_pre * discount_pre) / np.maximum(avg_runtime, 2)
    cpu_concurrent_post = sum_time_overlap_post / np.maximum(avg_runtime, 2)
    print("cpu_concurrent_pre:", np.min(cpu_concurrent_pre), np.mean(cpu_concurrent_pre), np.max(cpu_concurrent_pre))
    print("cpu_concurrent_post:", np.min(cpu_concurrent_post), np.mean(cpu_concurrent_post), np.max(cpu_concurrent_post))
    # estimate the amount of memory load imposed by the concurrent queries
    max_mem_usage_perc_pre = max_concurrent_card_pre / (max_concurrent_card_pre + max_est_card)
    avg_mem_usage_perc_pre = avg_concurrent_card_pre / (avg_concurrent_card_pre + avg_est_card)
    max_mem_usage_perc_post = max_concurrent_card_post / (max_concurrent_card_post + max_est_card)
    avg_mem_usage_perc_post = avg_concurrent_card_post / (avg_concurrent_card_post + avg_est_card)
    print("max_est_card:", np.min(max_est_card), np.mean(max_est_card), np.max(max_est_card))
    print("max_concurrent_card_pre:", np.min(max_concurrent_card_pre), np.mean(max_concurrent_card_pre), np.max(max_concurrent_card_pre))
    print("max_concurrent_card_post:", np.min(max_concurrent_card_post), np.mean(max_concurrent_card_post), np.max(max_concurrent_card_post))
    print("avg_est_card:", np.min(avg_est_card), np.mean(avg_est_card), np.max(avg_est_card))
    print("avg_concurrent_card_pre:", np.min(avg_concurrent_card_pre), np.mean(avg_concurrent_card_pre), np.max(avg_concurrent_card_pre))
    print("avg_concurrent_card_post:", np.min(avg_concurrent_card_post), np.mean(avg_concurrent_card_post), np.max(avg_concurrent_card_post))
    memory_concurrent = np.log(
        m1
        * np.maximum(max_concurrent_card_pre + max_est_card - memory_size, 0.01)
        * max_mem_usage_perc_pre
        + m2
        * np.maximum(avg_concurrent_card_pre + avg_est_card - memory_size, 0.01)
        * avg_mem_usage_perc_pre
        + m3
        * np.maximum(max_concurrent_card_post + max_est_card - memory_size, 0.01)
        * max_mem_usage_perc_post
        + m4
        * np.maximum(avg_concurrent_card_post + avg_est_card - memory_size, 0.01)
        * avg_mem_usage_perc_post
        + 0.0001
    ) * np.log((m1 + m3) * max_est_card + (m2 + m4) * avg_est_card + 0.0001)
    memory_concurrent = np.maximum(memory_concurrent, 0)
    print("memory_concurrent:", np.min(memory_concurrent), np.mean(memory_concurrent), np.max(memory_concurrent))
    print("cpu_time_isolated", np.min(cpu_time_isolated), np.mean(cpu_time_isolated),  np.max(cpu_time_isolated))
    memory_concurrent = np.maximum(memory_concurrent, 0)
    # estimate the CPU time of a query by considering the contention of CPU and memory of other queries
    cpu_time = (
        1
        + c1 * cpu_concurrent_pre
        + c1 * cpu_concurrent_post
        + m5 * memory_concurrent
        + cm1 * np.sqrt((cpu_concurrent_pre + cpu_concurrent_post) * memory_concurrent)
    ) * cpu_time_isolated
    # final runtime of a query is estimated to be the queueing time + io_time + cpu_time
    print("queueing time:", np.min(queueing_time), np.mean(queueing_time), np.max(queueing_time))
    print("io time:", np.min(io_time), np.mean(io_time), np.max(io_time))
    print("CPU time:", np.min(cpu_time), np.mean(cpu_time), np.max(cpu_time))
    return np.maximum(queueing_time + io_time + cpu_time, 0.01)
    

class ComplexFitCurveSeparation(ComplexFitCurve):
    """
    Complex fit curve model for runtime prediction with concurrency
    See interaction_func_scipy for detailed analytical functions
    """

    def __init__(self, is_column_store=False, opt_method='scipy'):
        super().__init__(is_column_store, opt_method)
        self.analytic_params = [0.3, 0.5, 20, 2, 0.2, 0.9, 0.3, 0.3, 0.3, 0.3, 0.1, 0.2, 0.5, 0.5, 10, 200, 16000]
        self.param_names = ['n1', 'q1', 'i1', 'i2', 'c1', 'c2', 'm1', 'm2', 'm3', 'm4', 'm5', 'cm1', 
                            'r1', 'r2', 'max_concurrency', 'avg_io_speed', 'memory_size',]
        self.bound = optimization.Bounds(
            [0.01, 0.1, 10, 0.1, 0.0001, 0.0001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.4, 0.4, 10, 20, 10000],
            [1, 1, 200, 2, 1, 1, 1, 0.9, 0.5, 0.5, 0.5, 0.5, 0.8, 0.8, 20, 2000, 25000],
        )
        self.constrain = optimization.Bounds(
            [0.1, 0.1, 10, 0.1, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.5, 0.05, 2, 20, 10000],
            [1, 1, 200, 2, 1, 0.9, 0.9, 0.9, 0.5, 0.5, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.penalty = [100, 100, 0.1, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 1, 0.1, 0.01]

    def featurize_data(self, concurrent_df):
        global_y = []
        global_isolated_runtime = []
        global_avg_runtime = []
        global_num_concurrency_pre = []
        global_num_concurrency_post = []
        global_sum_concurrent_runtime_pre = []
        global_sum_concurrent_runtime_post = []
        global_avg_time_elapsed_pre = []
        global_sum_time_elapsed_post = []
        global_sum_time_overlap_post = []
        global_est_scan = []
        global_est_concurrent_scan_pre = []
        global_est_concurrent_scan_post = []
        global_scan_sharing_percentage = []
        global_max_est_card = []
        global_avg_est_card = []
        global_max_concurrent_card_pre = []
        global_max_concurrent_card_post = []
        global_avg_concurrent_card_pre = []
        global_avg_concurrent_card_post = []
        global_query_idx = dict()
        start = 0
        for i, rows in concurrent_df.groupby("query_idx"):
            if (
                    i not in self.isolated_rt_cache
                    or i not in self.query_info
                    or i not in self.average_rt_cache
            ):
                continue
            concurrent_rt = rows["runtime"].values
            start_time = rows["start_time"].values
            end_time = rows["end_time"].values
            query_info = self.query_info[i]
            n_rows = len(rows)
            num_concurrency_pre = rows["num_concurrent_queries_train"].values
            global_num_concurrency_pre.append(num_concurrency_pre)
            concur_info_prev = rows["concur_info_train"].values
            full_concur_info = rows["concur_info"].values            concur_info_post = []
            for j in range(len(full_concur_info)):
                new_info = [c for c in full_concur_info[j] if c not in full_concur_info[j]]
                concur_info_post.append(new_info)
            pre_exec_info = rows["pre_exec_info"].values

            global_query_idx[i] = (start, start + n_rows)
            start += n_rows
            global_y.append(concurrent_rt)
            global_isolated_runtime.append(np.ones(n_rows) * self.isolated_rt_cache[i])
            global_avg_runtime.append(np.ones(n_rows) * self.average_rt_cache[i])
            global_est_scan.append(np.ones(n_rows) * query_info["est_scan"])
            global_max_est_card.append(
                np.ones(n_rows) * np.max(query_info["all_cardinality"]) / (1024 * 1024)
            )
            global_avg_est_card.append(
                np.ones(n_rows)
                * np.average(query_info["all_cardinality"])
                / (1024 * 1024)
            )
            for j in range(n_rows):
                sum_concurrent_runtime_pre = 0
                sum_concurrent_runtime_post = 0
                sum_concurrent_scan_pre = 0
                sum_concurrent_scan_post = 0
                avg_time_elapsed_pre = 0
                sum_time_elapsed_post = 0
                sum_time_overlap_post = 0
                concurrent_card_pre = []
                concurrent_card_post = []
                for c in full_concur_info[j]:
                    if c[0] in self.average_rt_cache:
                        if c in concur_info_prev[j]:
                            sum_concurrent_runtime_pre += self.average_rt_cache[c[0]]
                            avg_time_elapsed_pre += (start_time[j] - c[1])
                        else:
                            sum_concurrent_runtime_post += self.average_rt_cache[c[0]]
                            # TODO: this is not practical, make it an estimation
                            sum_time_overlap_post += (end_time[j] - c[1])
                            sum_time_elapsed_post += (c[1] - start_time[j])
                    else:
                        print(c[0])
                    if c[0] in self.query_info:
                        if c in concur_info_prev[j]:
                            sum_concurrent_scan_pre += self.query_info[c[0]]["est_scan"]
                            concurrent_card_pre.extend(self.query_info[c[0]]["all_cardinality"])
                        else:
                            sum_concurrent_scan_post += self.query_info[c[0]]["est_scan"]
                            concurrent_card_post.extend(self.query_info[c[0]]["all_cardinality"])
                    else:
                        print(c[0])

                global_sum_concurrent_runtime_pre.append(sum_concurrent_runtime_pre)
                global_avg_time_elapsed_pre.append(avg_time_elapsed_pre / (len(concur_info_prev[j]) + 0.001))
                global_est_concurrent_scan_pre.append(sum_concurrent_scan_pre)
                if len(concurrent_card_pre) == 0:
                    global_max_concurrent_card_pre.append(0)
                    global_avg_concurrent_card_pre.append(0)
                else:
                    global_max_concurrent_card_pre.append(
                        np.max(concurrent_card_pre) / (1024 * 1024)
                    )
                    global_avg_concurrent_card_pre.append(
                        np.average(concurrent_card_pre) / (1024 * 1024)
                    )
                # TODO: may be able to change concur_info_prev to full_concur_info?
                global_scan_sharing_percentage.append(
                    self.estimate_data_share_percentage(
                        i, concur_info_prev[j], pre_exec_info[j]
                    )
                )
                if self.use_pre_info:
                    global_sum_concurrent_runtime_post.append(0)
                    global_est_concurrent_scan_post.append(0)
                    global_sum_time_overlap_post.append(0)
                    global_sum_time_elapsed_post.append(0)
                    global_max_concurrent_card_post.append(0)
                    global_avg_concurrent_card_post.append(0)
                else:
                    global_sum_concurrent_runtime_post.append(sum_concurrent_runtime_post)
                    global_est_concurrent_scan_post.append(sum_concurrent_scan_post)
                    global_sum_time_overlap_post.append(sum_time_overlap_post)
                    global_sum_time_elapsed_post.append(sum_time_elapsed_post)
                    if len(concurrent_card_post) == 0:
                        global_max_concurrent_card_post.append(0)
                        global_avg_concurrent_card_post.append(0)
                    else:
                        global_max_concurrent_card_post.append(
                            np.max(concurrent_card_post) / (1024 * 1024)
                        )
                        global_avg_concurrent_card_post.append(
                            np.average(concurrent_card_post) / (1024 * 1024)
                        )

            if self.use_pre_info:
                num_concurrency_post = np.zeros(n_rows)
            else:
                num_concurrency_post = rows["num_concurrent_queries"].values - rows["num_concurrent_queries_train"].values
            global_num_concurrency_post.append(num_concurrency_post)

        global_y = np.concatenate(global_y)
        global_isolated_runtime = np.concatenate(global_isolated_runtime)
        global_avg_runtime = np.concatenate(global_avg_runtime)
        global_num_concurrency_pre = np.concatenate(global_num_concurrency_pre)
        global_num_concurrency_post = np.concatenate(global_num_concurrency_post)

        global_est_scan = np.concatenate(global_est_scan)
        global_max_est_card = np.concatenate(global_max_est_card)
        global_avg_est_card = np.concatenate(global_avg_est_card)
        global_avg_time_elapsed_pre = np.asarray(global_avg_time_elapsed_pre)
        global_sum_time_elapsed_post = np.asarray(global_sum_time_elapsed_post)
        global_sum_time_overlap_post = np.asarray(global_sum_time_overlap_post)
        global_sum_concurrent_runtime_pre = np.asarray(global_sum_concurrent_runtime_pre)
        global_sum_concurrent_runtime_post = np.asarray(global_sum_concurrent_runtime_post)
        global_est_concurrent_scan_pre = np.asarray(global_est_concurrent_scan_pre)
        global_est_concurrent_scan_post = np.asarray(global_est_concurrent_scan_post)
        global_max_concurrent_card_pre = np.asarray(global_max_concurrent_card_pre)
        global_max_concurrent_card_post = np.asarray(global_max_concurrent_card_post)
        global_avg_concurrent_card_pre = np.asarray(global_avg_concurrent_card_pre)
        global_avg_concurrent_card_post = np.asarray(global_avg_concurrent_card_post)
        global_scan_sharing_percentage = np.asarray(global_scan_sharing_percentage)
        global_sum_time_overlap_post = np.zeros(len(global_scan_sharing_percentage))
        feature = (
            global_isolated_runtime,
            global_avg_runtime,
            global_num_concurrency_pre,
            global_num_concurrency_post,
            global_sum_concurrent_runtime_pre,
            global_sum_concurrent_runtime_post,
            global_avg_time_elapsed_pre,
            global_sum_time_elapsed_post,
            global_est_scan,
            global_est_concurrent_scan_pre,
            global_est_concurrent_scan_post,
            global_scan_sharing_percentage,
            global_max_est_card,
            global_avg_est_card,
            global_max_concurrent_card_pre,
            global_max_concurrent_card_post,
            global_avg_concurrent_card_pre,
            global_avg_concurrent_card_post
        )
        if self.opt_method == "torch" or self.opt_method == "nn":
            feature = list(feature)
            for i in range(len(feature)):
                feature[i] = torch.from_numpy(feature[i])
            feature = tuple(feature)
            global_y = torch.from_numpy(global_y)
        return feature, global_y, global_query_idx



In [13]:
parsed_queries_path = "/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_aurora/aurora_mixed_parsed_queries.json"
cfc = ComplexFitCurveSeparation(opt_method='xgboost')
#cfc.use_pre_info = True
cfc.pre_process_queries(parsed_queries_path)
cfc.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df, analytic_func=interaction_separation_func_scipy)
predictions_cfc, labels = cfc.predict(eval_trace_df)
print("===========Performance for simple linear regression model (all query)=============")
result_overall_cfc, result_per_query_cfc, result_by_interval_cfc = cfc.evaluate_performance(eval_trace_df, interval=[0, 10, 60])

[list([(229, 28572.003217, 28642.006212729448), (187, 28593.383758, 28701.44439523564), (94, 28604.706343, 28672.097786252565), (200, 28608.170393, 28713.354933987015), (190, 28636.812329, 28673.055997317795), (89, 28637.122941, 28639.329242927568), (207, 28646.594743999998, 28672.577629599135), (13, 28677.245227, 28678.397708794357), (183, 28685.207252, 28690.338478301193), (202, 28692.749753, 28696.14145326779), (185, 28696.071502, 28697.350820809508), (175, 28696.163834, 28697.32046666182), (128, 28702.107628, 28703.228533399324)])
 list([(17, 102778.581014, 102907.30542477232), (75, 102781.41064100001, 102907.0186980221), (208, 102863.86665499999, 102946.9488750775), (222, 102912.936455, 103016.14540265854), (66, 102913.693956, 102913.86200438181), (25, 102919.356316, 102920.33243074991), (78, 102925.569873, 102927.17525019727), (108, 102928.99709, 102931.58312000641), (37, 102935.10258, 102935.74335806473), (117, 102938.453013, 102959.80132919263), (193, 102949.964932, 102997.2200

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



50% absolute error is 4.688291072845459, q-error is 1.6249610669893497
90% absolute error is 37.716668653488135, q-error is 5.8046553265619085
95% absolute error is 58.79311559200286, q-error is 10.205036336192777
For query in range 0s to 10s, there are 5846 executions
50% absolute error is 2.046770140528679, q-error is 2.2157182990223614
90% absolute error is 9.79569137096405, q-error is 8.912581971777353
95% absolute error is 18.660486459732056, q-error is 19.47008022023875
For query in range 10s to 60s, there are 3041 executions
50% absolute error is 9.075685501098633, q-error is 1.4081897415542592
90% absolute error is 38.387831926345825, q-error is 3.326711196877038
95% absolute error is 57.206103563308716, q-error is 4.861732705983105
For query in range 60s to infs, there are 2020 executions
50% absolute error is 24.17027676105498, q-error is 1.2601118321415752
90% absolute error is 83.9517733573914, q-error is 2.1754625882846508
95% absolute error is 121.20186755657195, q-error 

In [7]:
predictions_cfc[0]

array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
       0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
       0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
       0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
       0.01, 0.01, 0.01, 0.01, 0.01, 0.01], dtype=float32)

In [None]:
list(zip(cfc.param_names, cfc.analytic_params))

In [17]:
cfc.use_pre_info = True
predictions_cfc_pre, labels = cfc.predict(eval_trace_df)
result_overall_cfc, result_per_query_cfc, result_by_interval_cfc = cfc.evaluate_performance(eval_trace_df, interval=[0, 10, 60])

50% absolute error is 5.21431303024292, q-error is 2.483853555916147
90% absolute error is 113.47500362396238, q-error is 9.993823368823575
95% absolute error is 189.08765301704392, q-error is 16.72587748905861
For query in range 0s to 10s, there are 5846 executions
50% absolute error is 2.0319700241088867, q-error is 2.5036861491805134
90% absolute error is 14.113977670669556, q-error is 10.906418791027175
95% absolute error is 37.37344044446945, q-error is 22.72993683901945
For query in range 10s to 60s, there are 3041 executions
50% absolute error is 12.643070459365845, q-error is 2.16511512529392
90% absolute error is 114.8088264465332, q-error is 7.980515617794491
95% absolute error is 159.46299624443054, q-error is 11.44426972025325
For query in range 60s to infs, there are 2020 executions
50% absolute error is 67.83300697803497, q-error is 3.0269872331890055
90% absolute error is 310.0426117420201, q-error is 10.902219678018195
95% absolute error is 470.04718644618936, q-error i

In [None]:
parsed_queries_path = "/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_aurora/aurora_mixed_parsed_queries.json"
cfc = ComplexFitCurveSeparation(opt_method='scipy')
#cfc.use_pre_info = True
cfc.pre_process_queries(parsed_queries_path)
cfc.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df, analytic_func=interaction_separation_func_scipy)
predictions_cfc, labels = cfc.predict(eval_trace_df)
print("===========Performance for simple linear regression model (all query)=============")
result_overall_cfc, result_per_query_cfc, result_by_interval_cfc = cfc.evaluate_performance(eval_trace_df, interval=[0, 10, 60])

In [None]:
parsed_queries_path = "/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_aurora/aurora_mixed_parsed_queries.json"
cfc = ComplexFitCurve(opt_method='scipy')
cfc.pre_process_queries(parsed_queries_path)
cfc.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df)
predictions_cfc, labels = cfc.predict(eval_trace_df)
print("===========Performance for simple linear regression model (all query)=============")
result_overall_cfc, result_per_query_cfc, result_by_interval_cfc = cfc.evaluate_performance(eval_trace_df, interval=[0, 10, 60])

In [None]:
cfc.analytic_params

In [None]:
for i in result_per_query_cfc:
    if cfc.average_rt_cache[i] > 5:
        print(i, result_per_query_cfc[i][0], result_per_query_cfc[i][1], result_per_query_sfc[i][0], result_per_query_sfc[i][1],
              result_per_query_xgb[i][0], result_per_query_xgb[i][1])

In [None]:
cfc.use_pre_info = False
feature, global_y, global_query_idx = cfc.featurize_data(eval_trace_df)
f_t = np.stack(feature).transpose()

In [None]:
i = 217
idx = np.argsort(predictions_cfc[i])
curr_label = global_y[global_query_idx[i][0]: global_query_idx[i][1]]
for j in list(idx[:10]) + list(idx[-10:]):
    print("==================================")
    print(predictions_cfc[i][j], curr_label[j])
    feature = tuple(f_t[global_query_idx[i][0]: global_query_idx[i][1]][j])
    print(feature)
    #pred = interaction_separation_func_scipy_debug(feature, *cfc.analytic_params)
    interaction_separation_func_scipy(feature, *cfc.analytic_params, debug=True)

In [21]:
i = 0
print(isolated_trace_df["runtime"].iloc[i])
idx = np.argsort(predictions_cfc[i])
np.stack((predictions_cfc[i][idx], predictions_cfc_pre[i][idx], predictions_xgb[i][idx], labels[i][idx]), axis=1)

29.34782457351685


array([[ 11.50751686,  20.18006325,  15.47596836,  33.02771139],
       [ 15.30519485,  17.20504951,  21.4738102 ,   7.25911045],
       [ 17.27965927,  17.27965927,  37.05118561,  17.60623455],
       [ 19.50398827,  19.50398827,  33.86337662,   8.07139182],
       [ 20.28778458,  20.28778458,  17.9494648 ,   4.74847651],
       [ 21.38740158,  24.8349781 ,  52.14645004,  22.04174924],
       [ 21.44119072,  20.07876396,  40.47449493,  24.57741523],
       [ 21.5802002 ,  21.5802002 ,  21.77821541,  13.10642648],
       [ 24.95308113,  24.95308113,  47.67791748,  23.84352231],
       [ 35.83611679,  29.04348183,  29.52569771,  11.41822219],
       [ 37.44343567,  19.50398827,  28.5782547 ,  40.59318066],
       [ 40.91133881,  17.28112221,  30.013134  ,  36.34676528],
       [ 42.57060623,  20.18683434,  10.27868557,  32.94049072],
       [ 56.2075882 ,  51.98566055, 116.1361084 ,  38.31848145],
       [ 56.59609604,  26.41164017, 126.97180176,  38.17366219],
       [ 57.68268967,  48

In [None]:
(0)

In [None]:
plan[""]

In [None]:
plan["parsed_queries"][-1]

In [None]:
plan.keys()

In [None]:
np.maximum(np.zeros(3), 1)

In [18]:
sfc = SimpleFitCurve()
sfc.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df)
#predictions, labels = sfc.predict(eval_trace_df)
#print("===========Performance for simple curve fitting model (per query)=============")
#result_overall, result_per_query = sfc.evaluate_performance(eval_trace_df, use_train=True)
predictions_sfc, labels = sfc.predict(eval_trace_df, use_global=True)
print("===========Performance for simple curve fitting model (all query)=============")
result_overall_sfc, result_per_query_sfc, result_by_interval_sfc = sfc.evaluate_performance(eval_trace_df, use_global=True, interval=[0, 10, 60])

50% absolute error is 9.788035813636878, q-error is 2.3922804101475075
90% absolute error is 73.57756001053527, q-error is 16.778080334168294
95% absolute error is 119.07045884474977, q-error is 42.23310157942017
For query in range 0s to 10s, there are 5846 executions
50% absolute error is 7.287450485685377, q-error is 3.5338528953982036
90% absolute error is 33.85230660144654, q-error is 37.872899387937515
95% absolute error is 45.84358039286986, q-error is 72.01824370751181
For query in range 10s to 60s, there are 3041 executions
50% absolute error is 18.52192090984741, q-error is 1.8923039862444642
90% absolute error is 72.33158228428192, q-error is 4.521788056828932
95% absolute error is 102.60641380191056, q-error is 6.501112290017443
For query in range 60s to infs, there are 2020 executions
50% absolute error is 51.2329371930626, q-error is 1.7319539479876767
90% absolute error is 182.86019808863702, q-error is 3.8771752711362857
95% absolute error is 242.72915127129139, q-error 



In [19]:
xgb = XGBoostPredictor(k=240)
xgb.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df, use_pre_exec_info=True)
predictions_xgb, labels = xgb.predict(eval_trace_df)
#predictions_xgb, labels = xgb.predict(concurrency_df.iloc[train_idx], use_train=False)
print("===========Performance for XGBoost model (train on full)=============")
result_overall_xgb, result_per_query_xgb, result_by_interval_xgb = xgb.evaluate_performance(eval_trace_df, interval=[0, 10, 60])
#result_overall_xgb, result_per_query_xgb = xgb.evaluate_performance(concurrency_df.iloc[train_idx], use_train=False)

50% absolute error is 9.366996049880981, q-error is 2.5168265427249255
90% absolute error is 64.30512781143187, q-error is 460.95078186272565
95% absolute error is 93.46872384548182, q-error is 1670.290962993314
For query in range 0s to 10s, there are 5846 executions
50% absolute error is 3.8632505769492127, q-error is 6.502462162996213
90% absolute error is 23.45195484161377, q-error is 1268.166601027831
95% absolute error is 40.25191509723663, q-error is 2764.1679523768225
For query in range 10s to 60s, there are 3041 executions
50% absolute error is 16.598748922348026, q-error is 1.7726704624543774
90% absolute error is 66.73179960250854, q-error is 5.630641441485818
95% absolute error is 88.88508939743042, q-error is 9.710423976518914
For query in range 60s to infs, there are 2020 executions
50% absolute error is 39.404107451438904, q-error is 1.4401986823913386
90% absolute error is 130.76991715431214, q-error is 3.14451403190475
95% absolute error is 187.91084424257278, q-error i

In [None]:
a