In [1]:
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.append("../")
from utils.load_brad_trace import load_trace, create_concurrency_dataset, load_trace_all_version
from models.concurrency.analytical_models import SimpleFitCurve, ComplexFitCurve
from models.concurrency.xgboost import XGBoostPredictor
from models.concurrency.linear_regression import SimpleLinearReg
np.set_printoptions(suppress=True)

In [2]:
folder_name = "mixed_aurora"
directory = f"/Users/ziniuw/Desktop/research/Data/AWS_trace/{folder_name}/"
all_raw_trace, all_trace = load_trace_all_version(directory, 8, concat=True)
all_concurrency_df = []
for trace in all_trace:
    concurrency_df = create_concurrency_dataset(trace, engine=None, pre_exec_interval=60)
    all_concurrency_df.append(concurrency_df)
concurrency_df = pd.concat(all_concurrency_df, ignore_index=True)
isolated_trace_df = pd.read_csv(f"/Users/ziniuw/Desktop/research/Data/AWS_trace/{folder_name}/repeating_olap_batch_warmup.csv")
#isolated_trace_df = pd.read_csv(f"/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_redshift/repeating_olap_batch_warmup.csv")
isolated_trace_df["runtime"] = isolated_trace_df["run_time_s"]
isolated_rt_cache = dict()
for i, rows in isolated_trace_df.groupby("query_idx"):
    isolated_rt_cache[i] = np.median(rows["runtime"])

client 4 trace not found
client 5 trace not found
client 6 trace not found
client 7 trace not found
client 4 trace not found
client 5 trace not found
client 6 trace not found
client 7 trace not found
client 4 trace not found
client 5 trace not found
client 6 trace not found
client 7 trace not found
client 4 trace not found
client 5 trace not found
client 6 trace not found
client 7 trace not found


In [None]:
for i, rows in concurrency_df.groupby("query_idx"):
    runtime = rows["runtime"].values
    print(i, len(rows), isolated_rt_cache[i], np.mean(runtime), np.min(runtime), np.max(runtime), np.std(runtime))

In [3]:
np.random.seed(4)
train_idx = np.random.choice(len(concurrency_df), size=int(0.8 * len(concurrency_df)), replace=False)
test_idx = [i for i in range(len(concurrency_df)) if i not in train_idx]
eval_trace_df = concurrency_df.iloc[test_idx]
eval_trace_df = eval_trace_df[eval_trace_df['num_concurrent_queries'] > 0]

In [23]:
import numpy as np
from xgboost import XGBRegressor
import torch
import torch.optim as optim
from torch.nn.functional import l1_loss
import scipy.optimize as optimization
from torch.utils.data import DataLoader
from models.concurrency.base_model import ConcurPredictor
from models.concurrency.utils import QueryFeatureDataset, SimpleNet
from parser.utils import load_json, dfs_cardinality, estimate_scan_in_mb


def simple_queueing_func(x, a1, a2, b1):
    """
    a1 represents the average exec-time of a random query under concurrency
    b1 represents the max level of concurrency in a system
    a2 represents the average impact on a query's runtime when executed concurrently with other queries
    """
    num_concurrency, isolated_runtime = x
    return (a1 * np.maximum(num_concurrency - b1, 0)) + (
        1 + a2 * np.minimum(num_concurrency, b1)
    ) * isolated_runtime


class SimpleFitCurve(ConcurPredictor):
    """
    Simple fit curve model for runtime prediction with concurrency
    runtime = queue_time(num_concurrency) + alpha(num_concurrency) * isolated_runtime
            = (a1 * max(num_concurrency-b1, 0)) + (1 + a2*min(num_concurrency, b1)) * isolated_runtime
    optimize a1, b1, b2
    """

    def __init__(self):
        super().__init__()
        self.isolated_rt_cache = dict()
        self.use_train = True
        self.a1_global = 0
        self.a1 = dict()
        self.b1_global = 0
        self.b1 = dict()
        self.a2_global = 0
        self.a2 = dict()

    def train(self, trace_df, use_train=True, isolated_trace_df=None):
        self.use_train = use_train
        self.get_isolated_runtime_cache(trace_df, isolated_trace_df)
        concurrent_df = trace_df[trace_df["num_concurrent_queries"] > 0]

        global_y = []
        global_x = []
        global_ir = []
        for i, rows in concurrent_df.groupby("query_idx"):
            if i not in self.isolated_rt_cache:
                continue
            isolated_rt = self.isolated_rt_cache[i]
            concurrent_rt = rows["runtime"].values
            if use_train:
                num_concurrency = rows["num_concurrent_queries_train"].values
            else:
                num_concurrency = rows["num_concurrent_queries"].values
            if len(num_concurrency) < 10:
                continue
            global_y.append(concurrent_rt)
            global_x.append(num_concurrency)
            global_ir.append(np.ones(len(num_concurrency)) * isolated_rt)
            fit, _ = optimization.curve_fit(
                simple_queueing_func,
                (num_concurrency, np.ones(len(num_concurrency)) * isolated_rt),
                concurrent_rt,
                np.array([5, 0.1, 20]),
            )
            self.a1[i] = fit[0]
            self.a2[i] = fit[1]
            self.b1[i] = fit[2]
        global_y = np.concatenate(global_y)
        global_x = np.concatenate(global_x)
        global_ir = np.concatenate(global_ir)
        fit, _ = optimization.curve_fit(
            simple_queueing_func,
            (global_x, global_ir),
            global_y,
            np.array([5, 0.1, 20]),
        )
        self.a1_global = fit[0]
        self.a2_global = fit[1]
        self.b1_global = fit[2]

    def predict(self, eval_trace_df, use_global=False):
        predictions = dict()
        labels = dict()
        for i, rows in eval_trace_df.groupby("query_idx"):
            if i not in self.isolated_rt_cache or i not in self.a1:
                continue
            isolated_rt = self.isolated_rt_cache[i]
            label = rows["runtime"].values
            labels[i] = label
            if self.use_train:
                num_concurrency = rows["num_concurrent_queries_train"].values
            else:
                num_concurrency = rows["num_concurrent_queries"].values
            x = (num_concurrency, np.ones(len(num_concurrency)) * isolated_rt)
            if use_global:
                pred = simple_queueing_func(
                    x, self.a1_global, self.a2_global, self.b1_global
                )
            else:
                pred = simple_queueing_func(x, self.a1[i], self.a2[i], self.b1[i])
            pred = np.maximum(pred, 0.001)
            predictions[i] = pred
        return predictions, labels


def interaction_func_scipy(
    x,
    q1,
    i1,
    i2,
    c1,
    m1,
    m2,
    m3,
    cm1,
    r1,
    r2,
    max_concurrency,
    avg_io_speed,
    memory_size,
):
    """
    An analytical function that can consider 3 types of resource sharing/contention: IO, memory, CPU
    x:: input tuple containing:
        isolated_runtime: the isolated runtime without concurrency of a query
        avg_runtime: average or median observed runtime of a query under any concurrency
        num_concurrency: number of concurrent queries running with this query
        sum_concurrent_runtime: sum of the estimated runtime of all queries concurrently running with this query (CPU)
        est_scan: estimated MB of data that this query will need to scan (IO)
        est_concurrent_scan: estimated MB of data that the concurrently running queries will need to scan (IO)
        scan_sharing_percentage: estimated percentage of data in cache (sharing) according to concurrent queries
        max_est_card: maximum estimated cardinality in the query plan of this query (reflect peak memory usage)
        avg_est_card: average estimated cardinality in the query plan of this query (reflect average memory usage)
        max_concurrent_card: maximum estimated cardinality for all concurrent queries
        avg_concurrent_card: average estimated cardinality for all concurrent queries
    TODO: adding memory and CPU information
    """
    (
        isolated_runtime,
        avg_runtime,
        num_concurrency,
        sum_concurrent_runtime,
        est_scan,
        est_concurrent_scan,
        scan_sharing_percentage,
        max_est_card,
        avg_est_card,
        max_concurrent_card,
        avg_concurrent_card,
    ) = x
    # fraction of running queries (as opposed to queueing queries)
    running_frac = np.minimum(num_concurrency, max_concurrency) / np.maximum(
        num_concurrency, 1
    )
    # estimate queueing time of a query based on the sum of concurrent queries' run time
    queueing_time = (
        q1
        * (
            np.maximum(num_concurrency - max_concurrency, 0)
            / np.maximum(num_concurrency, 1)
        )
        * sum_concurrent_runtime
    )
    # estimate io_speed of a query assuming each query has a base io_speed of i1 + the io speed due to contention
    io_speed = i1 + avg_io_speed / np.minimum(
        np.maximum(num_concurrency, 1), max_concurrency
    )
    # estimate time speed on IO as the (estimated scan - data in cache) / estimated io_speed
    # use i2 to adjust the estimation error in est_scan and scan_sharing_percentage
    io_time = i2 * est_scan * (1 - scan_sharing_percentage) / io_speed
    # estimate the amount of CPU work/time as the weighted average of isolated_runtime and avg_runtime - io_time
    cpu_time_isolated = (r1 * isolated_runtime + r2 * avg_runtime) - io_time
    # estimate the amount of CPU work imposed by the concurrent queries (approximated by their estimate runtime)
    cpu_concurrent = (running_frac * sum_concurrent_runtime) / avg_runtime
    # estimate the amount of memory load imposed by the concurrent queries
    max_mem_usage_perc = max_concurrent_card / (max_concurrent_card + max_est_card)
    avg_mem_usage_perc = avg_concurrent_card / (avg_concurrent_card + avg_est_card)
    memory_concurrent = np.log(
        m1
        * np.maximum(max_concurrent_card + max_est_card - memory_size, 0.01)
        * max_mem_usage_perc
        + m2
        * np.maximum(avg_concurrent_card + avg_est_card - memory_size, 0.01)
        * avg_mem_usage_perc
        + 0.0001
    ) * np.log(m1 * max_est_card + m2 * avg_est_card + 0.0001)
    memory_concurrent = np.maximum(memory_concurrent, 0)
    # estimate the CPU time of a query by considering the contention of CPU and memory of other queries
    cpu_time = (
        1
        + c1 * cpu_concurrent
        + m3 * memory_concurrent
        + cm1 * np.sqrt(cpu_concurrent * memory_concurrent)
    ) * cpu_time_isolated
    # final runtime of a query is estimated to be the queueing time + io_time + cpu_time
    return np.maximum(queueing_time + io_time + cpu_time, 0.01)


def interaction_func_torch(
    x,
    q1,
    i1,
    i2,
    c1,
    m1,
    m2,
    m3,
    cm1,
    r1,
    r2,
    max_concurrency,
    avg_io_speed,
    memory_size,
):
    # See interaction_func_scipy for explanation
    (
        isolated_runtime,
        avg_runtime,
        num_concurrency,
        sum_concurrent_runtime,
        est_scan,
        est_concurrent_scan,
        scan_sharing_percentage,
        max_est_card,
        avg_est_card,
        max_concurrent_card,
        avg_concurrent_card,
    ) = x
    num_query = len(num_concurrency)
    running_frac = torch.minimum(num_concurrency, max_concurrency) / torch.maximum(
        num_concurrency, torch.tensor(1)
    )
    # estimate queueing time of a query based on the sum of concurrent queries' run time
    queueing_time = (
        q1
        * (
            torch.maximum(num_concurrency - max_concurrency, torch.tensor(0))
            / torch.maximum(num_concurrency, torch.tensor(1))
        )
        * sum_concurrent_runtime
    )
    # estimate io_speed of a query assuming each query has a base io_speed of i1 + the io speed due to contention
    io_speed = i1 + avg_io_speed / torch.minimum(
        torch.maximum(num_concurrency, torch.tensor(1)), max_concurrency
    )
    # estimate time speed on IO as the (estimated scan - data in cache) / estimated io_speed
    # use i2 to adjust the estimation error in est_scan and scan_sharing_percentage
    io_time = i2 * est_scan * (1 - scan_sharing_percentage) / io_speed
    # estimate the amount of CPU work/time as the weighted average of isolated_runtime and avg_runtime - io_time
    cpu_time_isolated = (r1 * isolated_runtime + r2 * avg_runtime) - io_time
    # estimate the amount of CPU work imposed by the concurrent queries (approximated by their estimate runtime)
    cpu_concurrent = (running_frac * sum_concurrent_runtime) / avg_runtime
    # estimate the amount of memory load imposed by the concurrent queries
    max_mem_usage_perc = max_concurrent_card / (max_concurrent_card + max_est_card)
    avg_mem_usage_perc = avg_concurrent_card / (avg_concurrent_card + avg_est_card)
    memory_concurrent = torch.log(
        m1
        * torch.maximum(max_concurrent_card + max_est_card - memory_size, torch.tensor(0) + 0.01)
        * max_mem_usage_perc
        + m2
        * torch.maximum(avg_concurrent_card + avg_est_card - memory_size, torch.tensor(0) + 0.01)
        * avg_mem_usage_perc
        + 0.0001
    ) * torch.log(m1 * max_est_card + m2 * avg_est_card + 0.0001)
    memory_concurrent = torch.maximum(memory_concurrent, torch.tensor(0))
    # estimate the CPU time of a query by considering the contention of CPU and memory of other queries
    cpu_time = (
        1
        + c1 * cpu_concurrent
        + m3 * memory_concurrent
        + cm1 * torch.sqrt(cpu_concurrent * memory_concurrent)
    ) * cpu_time_isolated
    # final runtime of a query is estimated to be the queueing time + io_time + cpu_time
    return torch.maximum(queueing_time + io_time + cpu_time, torch.tensor(0) + 0.01)


def fit_curve_loss_torch(x, y, params, constrain, loss_func="soft_l1", penalties=None):
    pred = interaction_func_torch(x, *params)
    lb = constrain.lb
    ub = constrain.ub
    if loss_func == "mae":
        loss = torch.abs(pred - y)
    elif loss_func == "mse":
        loss = (pred - y) ** 2
    elif loss_func == "soft_l1":
        loss = torch.sqrt(1 + (pred - y) ** 2) - 1
    else:
        assert False, f"loss func {loss_func} not implemented"
    loss = torch.mean(loss)
    for i, p in enumerate(params):
        if penalties is not None:
            penalty = penalties[i]
        else:
            penalty = 1
        pen = torch.exp(penalty * (p - ub[i])) + torch.exp(-1 * penalty * (p - lb[i]))
        loss += pen
    return loss


class ComplexFitCurve(ConcurPredictor):
    """
    Complex fit curve model for runtime prediction with concurrency
    See interaction_func_scipy for detailed analytical functions
    """

    def __init__(self, is_column_store=False, opt_method='scipy'):
        """

        :param is_column_store:
        :param opt_method:
        """
        # indicate whether the DBMS is a column_store
        #
        super().__init__()
        self.isolated_rt_cache = dict()
        self.average_rt_cache = dict()
        self.query_info = dict()
        self.db_stats = None
        self.table_sizes = dict()
        self.table_sizes_by_index = dict()
        self.table_nrows_by_index = dict()
        self.table_column_map = dict()
        self.use_train = True
        self.is_column_store = is_column_store
        self.opt_method = opt_method
        self.batch_size = 1024
        self.analytic_params = [0.5, 20, 2, 0.2, 0.5, 0.5, 0.1, 0.2, 0.8, 0.2, 10, 200, 16000]
        self.bound = optimization.Bounds(
            [0.1, 10, 0.01, 0.001, 0.001, 0.001, 0.001, 0.001, 0.5, 0.05, 2, 20, 10000],
            [1, 200, 2, 1, 0.9, 0.9, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.constrain = optimization.Bounds(
            [0.1, 10, 0.1, 0.01, 0.01, 0.01, 0.01, 0.1, 0.5, 0.05, 2, 20, 10000],
            [1, 200, 2, 1, 0.9, 0.9, 0.5, 0.5, 0.95, 0.4, 20, 2000, 50000],
        )
        self.penalty = [100, 0.1, 100, 100, 100, 100, 100, 100, 100, 100, 1, 0.1, 0.01]
        self.loss_func = "soft_l1"
        self.model = None

    def _compute_table_size(self):
        for col in self.db_stats["column_stats"]:
            table = col["tablename"]
            if table not in self.table_column_map:
                self.table_sizes[table] = 0
                self.table_column_map[table] = []
            self.table_column_map[table].append(col["attname"])
            if col["avg_width"] is not None and col["avg_width"] > 0:
                self.table_sizes[table] += col["avg_width"]
        all_table_names = [t["relname"] for t in self.db_stats["table_stats"]]
        for table in self.table_sizes:
            if table in all_table_names:
                idx = all_table_names.index(table)
                num_tuples = self.db_stats["table_stats"][idx]["reltuples"]
                self.table_nrows_by_index[idx] = num_tuples
                size_in_mb = (num_tuples * self.table_sizes[table]) / (1024 * 1024)
                self.table_sizes[table] = size_in_mb
                self.table_sizes_by_index[idx] = size_in_mb

    def pre_process_queries(
        self, parsed_queries_path, with_width=True, use_true_card=False
    ):
        plans = load_json(parsed_queries_path, namespace=False)
        self.db_stats = plans["database_stats"]
        self._compute_table_size()
        self.query_info = dict()
        for i in range(len(plans["sql_queries"])):
            curr_query_info = dict()
            curr_query_info["sql"] = plans["sql_queries"][i]
            all_cardinality = []
            dfs_cardinality(
                plans["parsed_plans"][i], all_cardinality, with_width, use_true_card
            )
            curr_query_info["all_cardinality"] = all_cardinality
            est_scan, est_scan_per_table = estimate_scan_in_mb(
                self.db_stats,
                plans["parsed_queries"][i],
                use_true_card,
                self.is_column_store,
            )
            curr_query_info["est_scan"] = est_scan
            curr_query_info["est_scan_per_table"] = est_scan_per_table
            self.query_info[i] = curr_query_info

    def estimate_data_share_percentage(self, idx, concur_info, pre_exec_info=None):
        # TODO: make it smarter by considering buffer pool behavior
        curr_scan = self.query_info[idx]["est_scan_per_table"]
        curr_total_scan = self.query_info[idx]["est_scan"]
        if pre_exec_info is not None:
            concur_info = concur_info + pre_exec_info
        all_shared_scan = 0
        for table in curr_scan:
            table_size = self.table_sizes_by_index[table]
            table_shared_scan = 0
            for c in concur_info:
                concur_scan = self.query_info[c[0]]["est_scan_per_table"]
                if table in concur_scan:
                    concur_scan_perc = concur_scan[table] / table_size
                    overlap_scan = concur_scan_perc * curr_scan[table]
                    table_shared_scan += overlap_scan
            table_shared_scan = min(table_shared_scan, curr_scan[table])
            all_shared_scan += table_shared_scan
        return min(all_shared_scan / curr_total_scan, 1.0)

    def featurize_data(self, concurrent_df):
        global_y = []
        global_isolated_runtime = []
        global_avg_runtime = []
        global_num_concurrency = []
        global_sum_concurrent_runtime = []
        global_est_scan = []
        global_est_concurrent_scan = []
        global_scan_sharing_percentage = []
        global_max_est_card = []
        global_avg_est_card = []
        global_max_concurrent_card = []
        global_avg_concurrent_card = []
        global_query_idx = dict()
        start = 0
        for i, rows in concurrent_df.groupby("query_idx"):
            if (
                i not in self.isolated_rt_cache
                or i not in self.query_info
                or i not in self.average_rt_cache
            ):
                continue
            concurrent_rt = rows["runtime"].values
            query_info = self.query_info[i]
            n_rows = len(rows)
            if self.use_train:
                num_concurrency = rows["num_concurrent_queries_train"].values
                concur_info = rows["concur_info_train"].values
            else:
                num_concurrency = rows["num_concurrent_queries"].values
                concur_info = rows["concur_info"].values
            pre_exec_info = rows["pre_exec_info"].values

            global_query_idx[i] = (start, start + n_rows)
            start += n_rows
            global_y.append(concurrent_rt)
            global_isolated_runtime.append(np.ones(n_rows) * self.isolated_rt_cache[i])
            global_avg_runtime.append(np.ones(n_rows) * self.average_rt_cache[i])
            global_num_concurrency.append(num_concurrency)
            global_est_scan.append(np.ones(n_rows) * query_info["est_scan"])
            global_max_est_card.append(
                np.ones(n_rows) * np.max(query_info["all_cardinality"]) / (1024 * 1024)
            )
            global_avg_est_card.append(
                np.ones(n_rows)
                * np.average(query_info["all_cardinality"])
                / (1024 * 1024)
            )
            for j in range(n_rows):
                sum_concurrent_runtime = 0
                sum_concurrent_scan = 0
                concurrent_card = []
                for c in concur_info[j]:
                    if c[0] in self.average_rt_cache:
                        sum_concurrent_runtime += self.average_rt_cache[c[0]]
                    else:
                        print(c[0])
                    if c[0] in self.query_info:
                        sum_concurrent_scan += self.query_info[c[0]]["est_scan"]
                        concurrent_card.extend(self.query_info[c[0]]["all_cardinality"])
                    else:
                        print(c[0])

                global_sum_concurrent_runtime.append(sum_concurrent_runtime)
                global_est_concurrent_scan.append(sum_concurrent_scan)
                if len(concurrent_card) == 0:
                    global_max_concurrent_card.append(0)
                    global_avg_concurrent_card.append(0)
                else:
                    global_max_concurrent_card.append(
                        np.max(concurrent_card) / (1024 * 1024)
                    )
                    global_avg_concurrent_card.append(
                        np.average(concurrent_card) / (1024 * 1024)
                    )
                global_scan_sharing_percentage.append(
                    self.estimate_data_share_percentage(
                        i, concur_info[j], pre_exec_info[j]
                    )
                )

        global_y = np.concatenate(global_y)
        global_isolated_runtime = np.concatenate(global_isolated_runtime)
        global_avg_runtime = np.concatenate(global_avg_runtime)
        global_num_concurrency = np.concatenate(global_num_concurrency)
        global_est_scan = np.concatenate(global_est_scan)
        global_max_est_card = np.concatenate(global_max_est_card)
        global_avg_est_card = np.concatenate(global_avg_est_card)
        global_sum_concurrent_runtime = np.asarray(global_sum_concurrent_runtime)
        global_est_concurrent_scan = np.asarray(global_est_concurrent_scan)
        global_max_concurrent_card = np.asarray(global_max_concurrent_card)
        global_avg_concurrent_card = np.asarray(global_avg_concurrent_card)
        global_scan_sharing_percentage = np.asarray(global_scan_sharing_percentage)
        feature = (
            global_isolated_runtime,
            global_avg_runtime,
            global_num_concurrency,
            global_sum_concurrent_runtime,
            global_est_scan,
            global_est_concurrent_scan,
            global_scan_sharing_percentage,
            global_max_est_card,
            global_avg_est_card,
            global_max_concurrent_card,
            global_avg_concurrent_card,
        )
        if self.opt_method == "torch" or self.opt_method == "nn":
            feature = list(feature)
            for i in range(len(feature)):
                feature[i] = torch.from_numpy(feature[i])
            feature = tuple(feature)
            global_y = torch.from_numpy(global_y)
        return feature, global_y, global_query_idx

    def train(self, trace_df, use_train=True, isolated_trace_df=None):
        self.use_train = use_train
        self.get_isolated_runtime_cache(
            trace_df, isolated_trace_df, get_avg_runtime=True
        )
        concurrent_df = trace_df[trace_df["num_concurrent_queries"] > 0]
        feature, label, _ = self.featurize_data(concurrent_df)

        initial_param_value = np.asarray(self.analytic_params)
        if self.opt_method == "scipy":
            fit, _ = optimization.curve_fit(
                interaction_func_scipy,
                feature,
                label,
                initial_param_value,
                bounds=self.bound,
                jac="3-point",
                method="trf",
                #loss="soft_l1",
                verbose=1
            )
            self.analytic_params = list(fit)
        elif self.opt_method == "torch":
            torch_analytic_params = []
            torch_analytic_params_lr = []
            for p in self.analytic_params:
                if p == 10:
                    t_p = torch.tensor(float(p), requires_grad=False)
                else:
                    t_p = torch.tensor(float(p), requires_grad=True)
                torch_analytic_params.append(t_p)
                torch_analytic_params_lr.append({'params': t_p, 'lr': 0.01 * p ** 0.3})
            optimizer = optim.Adam(torch_analytic_params_lr, weight_decay=2e-5)
            dataset = QueryFeatureDataset(feature, label)
            train_dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
            for epoch in range(200):
                for X, y in train_dataloader:
                    optimizer.zero_grad()
                    loss = fit_curve_loss_torch(X, y, torch_analytic_params,
                                                self.constrain, loss_func=self.loss_func, penalties=self.penalty)
                    loss.backward()
                    optimizer.step()
                if epoch % 10 == 0:
                    print(epoch, loss.item())
                    print(torch_analytic_params)
            for i in range(len(self.analytic_params)):
                self.analytic_params[i] = torch_analytic_params[i].detach()
        elif self.opt_method == "nn":
            dataset = QueryFeatureDataset(feature, label)
            train_dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
            self.model = SimpleNet(len(feature))
            optimizer = optim.Adam(self.model.parameters(), lr=0.01, weight_decay=2e-5)
            for epoch in range(200):
                for X, y in train_dataloader:
                    X = torch.stack(X).float()
                    X = torch.transpose(X, 0, 1)
                    optimizer.zero_grad()
                    pred = self.model(X)
                    pred = pred.reshape(-1)
                    loss = l1_loss(pred, y)
                    loss.backward()
                    optimizer.step()
                if epoch % 10 == 0:
                    print(epoch, loss.item())
        elif self.opt_method == "xgboost":
            feature = np.stack(feature).T
            model = XGBRegressor(
                n_estimators=1000,
                max_depth=8,
                eta=0.2,
                subsample=1.0,
                eval_metric="mae",
                early_stopping_rounds=100,
            )
            train_idx = np.random.choice(
                len(feature), size=int(0.8 * len(feature)), replace=False
            )
            val_idx = [i for i in range(len(feature)) if i not in train_idx]
            model.fit(
                feature[train_idx],
                label[train_idx],
                eval_set=[(feature[val_idx], label[val_idx])],
                verbose=False,
            )
            self.model = model
        else:
            assert False, f"unrecognized optimization method {self.opt_method}"

    def predict(self, eval_trace_df, use_global=False, return_per_query=True):
        feature, labels, query_idx = self.featurize_data(eval_trace_df)
        if self.opt_method == "scipy":
            preds = interaction_func_scipy(
                feature,
                *self.analytic_params
            )
        elif self.opt_method == "torch":
            preds = interaction_func_torch(
                feature,
                *self.analytic_params
            )
            preds = preds.numpy()
            labels = labels.numpy()
        elif self.opt_method == "nn":
            feature = torch.stack(feature).float()
            feature = torch.transpose(feature, 0, 1)
            preds = self.model(feature)
            preds = preds.reshape(-1)
            preds = preds.detach().numpy()
            labels = labels.numpy()
        elif self.opt_method == "xgboost":
            feature = np.stack(feature).T
            preds = self.model.predict(feature)
            preds = np.maximum(preds, 0.001)
        else:
            assert False, f"unrecognized optimization method {self.opt_method}"
        if return_per_query:
            preds_per_query = dict()
            labels_per_query = dict()
            for i in query_idx:
                start, end = query_idx[i]
                preds_per_query[i] = preds[start:end]
                labels_per_query[i] = labels[start:end]
            return preds_per_query, labels_per_query
        else:
            return preds, labels


In [None]:
parsed_queries_path = "/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_aurora/aurora_mixed_parsed_queries.json"
cfc = ComplexFitCurve()
cfc.pre_process_queries(parsed_queries_path)
cfc.train(concurrency_df.iloc[train_idx], isolated_trace_df=isolated_trace_df)
predictions_cfc, labels = cfc.predict(eval_trace_df, use_global=True)
print("===========Performance for simple linear regression model (all query)=============")
result_overall_cfc, result_per_query_cfc = cfc.evaluate_performance(concurrency_df.iloc[train_idx], use_global=True)

In [24]:
parsed_queries_path = "/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_aurora/aurora_mixed_parsed_queries.json"
cfc = ComplexFitCurve(opt_method='scipy')
cfc.pre_process_queries(parsed_queries_path)
cfc.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df)
predictions_cfc, labels = cfc.predict(concurrency_df.iloc[train_idx], use_global=True)
print("===========Performance for simple linear regression model (all query)=============")
result_overall_cfc, result_per_query_cfc = cfc.evaluate_performance(concurrency_df.iloc[train_idx], use_global=True)

`ftol` termination condition is satisfied.
Function evaluations 35, initial cost 3.3346e+08, final cost 5.5839e+07, first-order optimality 7.51e+02.
50% absolute error is 5.436205015541748, q-error is 2.0866504343820074
90% absolute error is 74.22621238144698, q-error is 12.12002125949252
95% absolute error is 114.76209694732064, q-error is 27.084548610382537


In [25]:
predictions_cfc, labels = cfc.predict(eval_trace_df, use_global=True)
print("===========Performance for simple linear regression model (all query)=============")
result_overall_cfc, result_per_query_cfc = cfc.evaluate_performance(eval_trace_df, use_global=True)

50% absolute error is 5.661937418674135, q-error is 2.10329112364105
90% absolute error is 75.90385075638284, q-error is 11.512979169070585
95% absolute error is 119.0894088532552, q-error is 25.410259635121143


In [26]:
for i in result_per_query_cfc:
    print(i, result_per_query_cfc[i][0], result_per_query_cfc[i][1], result_per_query_sfc[i][0], result_per_query_sfc[i][1],
          result_per_query_xgb[i][0], result_per_query_xgb[i][1])

0 51.82382569600055 1.7247650271130417 51.22344125883993 1.6370317833217243 37.80836582183839 1.450638647264475
1 2.671035890011117 16.95079370664506 9.162169084777199 22.113613320078173 1.3425602316856384 84.20188574546069
2 55.94572149242434 1.5840677671241252 52.353389160758546 1.8018835159294326 38.74426138401032 1.3671155104772483
3 1.1440348529815674 25.991297294589494 9.246921200096308 23.78898572476075 0.31357495684642345 124.35531025554901
4 5.2738345414635255 2.0669263956193085 9.995737244376748 2.6146849225270765 8.100630760192871 2.3497256889568483
5 2.378194674468218 1.5616459652245138 8.383089867236698 3.227604855467194 6.548046827316284 2.7529165339994384
6 2.230724239102172 1.7144862467184558 8.189598985742462 3.279328779957821 2.9037328233243898 4.393588383279897
7 2.1779135631770448 1.7714973966397078 8.694922407953747 4.0242551201804275 7.637907981872559 4.845469317418319
8 1.7670155280991926 5.5969947753034255 8.443826408700197 7.890019923848901 0.8219994773389772 4

In [30]:
i = 103
print(isolated_trace_df["runtime"].iloc[i])
idx = np.argsort(predictions_cfc[i])
np.stack((predictions_cfc[i][idx], predictions_sfc[i][idx], predictions_xgb[i][idx], labels[i][idx]), axis=1)

14.924950122833252


array([[ 43.99655748,  34.94623108,  17.72149467,  10.56022811],
       [ 44.24343477,  34.94623108,  27.44502831,  16.47497368],
       [ 44.40851256,  13.24915453,  27.04289436,  57.60951018],
       [ 44.85594944,  22.40939974,  30.49825859,   4.00279546],
       [ 45.05420997,  34.94623108,  33.12750244,  15.77505875],
       [ 45.40674458,  22.40939974,  39.985672  ,   2.13486624],
       [ 48.75521397,  34.94623108, 178.01794434, 153.18231106],
       [ 54.94138748,  22.40939974,  30.96467972,   2.75144792],
       [ 65.71767275,  60.01989378,  33.15547562,  33.16273236],
       [ 86.31415185,  60.01989378,  59.86411285,  71.74322939],
       [ 89.16649693, 160.31454456,  62.6312294 ,  58.17491651],
       [ 95.1809856 , 122.70405052, 109.89292145, 312.86754394],
       [ 97.30431529,  60.01989378,  42.58627701,  30.59504151],
       [100.98178197,  60.01989378,  50.93345261,  38.58145213],
       [104.85802359, 160.31454456,  88.37911987,  66.89298582],
       [106.18721107,  85

In [None]:
plan = load_json(parsed_queries_path, namespace=False)

In [None]:
plan[""]

In [None]:
plan["parsed_queries"][-1]

In [None]:
plan.keys()

In [None]:
np.maximum(np.zeros(3), 1)

In [15]:
sfc = SimpleFitCurve()
sfc.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df)
#predictions, labels = sfc.predict(eval_trace_df)
#print("===========Performance for simple curve fitting model (per query)=============")
#result_overall, result_per_query = sfc.evaluate_performance(eval_trace_df, use_train=True)
predictions_sfc, labels = sfc.predict(eval_trace_df, use_global=True)
print("===========Performance for simple curve fitting model (all query)=============")
result_overall_sfc, result_per_query_sfc = sfc.evaluate_performance(eval_trace_df, use_global=True)

50% absolute error is 9.627042932918474, q-error is 2.4357127503878613
90% absolute error is 75.47100542916611, q-error is 15.68507968881842
95% absolute error is 123.25611489299501, q-error is 38.380486692719245




In [16]:
xgb = XGBoostPredictor(k=240)
xgb.train(concurrency_df.iloc[train_idx], use_train=False, isolated_trace_df=isolated_trace_df, use_pre_exec_info=True)
predictions_xgb, labels = xgb.predict(eval_trace_df)
#predictions_xgb, labels = xgb.predict(concurrency_df.iloc[train_idx], use_train=False)
print("===========Performance for XGBoost model (train on full)=============")
result_overall_xgb, result_per_query_xgb = xgb.evaluate_performance(eval_trace_df)
#result_overall_xgb, result_per_query_xgb = xgb.evaluate_performance(concurrency_df.iloc[train_idx], use_train=False)

50% absolute error is 9.593931913375854, q-error is 2.605207308176676
90% absolute error is 64.77609083652494, q-error is 476.7287504736639
95% absolute error is 99.9914370179175, q-error is 1584.1335855509935


In [None]:
np.log(0.001)

In [None]:
def mul(x, a, b):
    return x * a + b

In [None]:
a = (np.ones(3), np.ones(3)+1, np.ones(3)+2)

In [None]:
for b in a:
    b = torch.from_numpy(b)
    print(b)
print(a)

In [None]:
a = (torch.ones(5), torch.ones(5), torch.ones(5))
torch.stack(a, axis=1).shape

In [None]:
#q1 = torch.tensor(self.q1, requires_grad=True)
#i1 = torch.tensor(self.i1, requires_grad=True)
#i2 = torch.tensor(self.i2, requires_grad=True)
#c1 = torch.tensor(self.c1, requires_grad=True)
#m1 = torch.tensor(self.m1, requires_grad=True)
#m2 = torch.tensor(self.m2, requires_grad=True)
#m3 = torch.tensor(self.m3, requires_grad=True)
#cm1 = torch.tensor(self.cm1, requires_grad=True)
#r1 = torch.tensor(self.r1, requires_grad=True)
#r2 = torch.tensor(self.r2, requires_grad=True)
#max_concurrency = torch.tensor(self.max_concurrency, requires_grad=True)
#avg_io_speed = torch.tensor(self.avg_io_speed, requires_grad=True)
#memory_size = torch.tensor(self.memory_size, requires_grad=True)
#optimizer = optim.Adam([q1, ], lr=0.01)