In [None]:
import pandas as pd
import numpy as np

In [None]:
def gap_calculator(dim, CF, warp_size=32):
    t_norm = min(dim, CF * warp_size)
    t_residue = dim % (warp_size * CF)
    # must be a multiple of CF * warp_size
    if t_residue == 0:
        return 0
    else:
        return t_norm - t_residue
    
def dynamic_CF_prompter(dim, CF_set=None, warp_size=32):
    if CF_set == None:
        CF_set = [1, 2, 3, 4, 5, 6, 7, 8]
    CF_set = CF_set[: math.ceil(dim / 32)]
    valid_CF_set = []

    for CF in CF_set:
        gap = gap_calculator(dim, CF, warp_size)
        print("dim({})->CF:{} gap:{}".format(dim, CF, gap))
        if gap < 32:
            valid_CF_set.append(CF)
    CF_set = valid_CF_set
    return CF_set

def rnd(dim, length):
    options = len(dynamic_CF_prompter(dim))*2*2*2
    rnd_pre = []
    for i in range(length):
        rnd_pre.append(np.random.randint(0, options))
    return np.array(rnd_pre)

In [None]:
def _get_mtxfeature(data, mask=None):
    """Get the matrix feature"""
    # print(data.columns)
    feature = data.drop(["mtx_name", "OP_SpMM"], axis=1)
    np_feature = feature.values
    # print(np_feature.shape)
    return np_feature


def _get_label(data, mask=None):
    """Get the label of the data"""
    label = data["OP_SpMM"]
    np_label = label.values
    return np_label


def _get_feature_names(data, mask=None):
    """feature name"""
    np_fnames = data.columns[1:-1].values
    return np_fnames


def _get_target_names(data, mask=None):
    """SpMM method name"""
    tnames = data.iloc[0].values
    return tnames

In [None]:
from sklearn.utils import Bunch


def load_mtx(csv_path="../ParamSpMM-log/", mask=None, dim=16):
    # random load 20% data for test
    train_data_csv = pd.read_csv(csv_path + "dim" + str(dim) + "_OP_SpMM.csv")
    bench_data_csv = pd.read_csv(csv_path + "dim" + str(dim) + ".csv")

    if mask is not None:
        train_data_csv = train_data_csv[mask]
        bench_data_csv = bench_data_csv[mask]
    mtx = Bunch()
    mtx.feature = _get_mtxfeature(train_data_csv)
    mtx.label = _get_label(train_data_csv)
    mtx.fnames = _get_feature_names(train_data_csv)
    mtx.tnames = _get_target_names(bench_data_csv)
    # mtx.num = num
    # mtx = mtx.data[random_mask]
    return mtx

In [None]:
from sklearn.model_selection import train_test_split


def performance_loss(rnd_tree, csv_path, dim):
    data = np.ones((202, 1))
    data_label = np.arange(202)
    # get test set
    X_train, X_test, y_train, y_test = train_test_split(
        data, data_label, test_size=0.2, random_state=17
    )
    mask = np.ones(len(data_label), dtype=bool)
    mask[y_test] = False
    mask = ~mask

    mtx_data = load_mtx(csv_path=csv_path, dim=dim, mask=mask)
    tp = pd.read_csv(csv_path + "dim" + str(dim) + ".csv")[mask]
    predict = rnd_tree.predict(mtx_data.feature)
    rnd_pre = rnd(dim, len(predict)) 
    # cal the performance loss
    lable = mtx_data.label # true label
    mask = predict != lable
    # optimal performance
    op_tp = pd.DataFrame(tp.max(axis=1, numeric_only=True))
    # optimal performance of wrong predict
    mask_op = op_tp.values[mask]
    error_num = mask.sum()
    mask_tp = tp.values[mask]
    mask_predict = predict[mask]
    predict_tp = mask_tp[np.arange(error_num), mask_predict].reshape(-1, 1)
    loss = (mask_op - predict_tp) / mask_op
    #  rnd performance
    rnd_mask = rnd_pre!=lable
    rnd_mask_op = op_tp.values[rnd_mask]
    rnd_error_num = rnd_mask.sum()
    rnd_mask_tp = tp.values[rnd_mask]
    rnd_predict = rnd_pre[rnd_mask]
    rnd_predict_tp = rnd_mask_tp[np.arange(rnd_error_num), rnd_predict].reshape(-1, 1)
    rnd_loss = (rnd_mask_op - rnd_predict_tp) / rnd_mask_op
    
    # test data size
    test_size = lable.shape[0]
    loss = loss.sum() / test_size
    print("dim{} Wrong number{} in {}".format(dim, error_num, test_size))
    print("Average normalized performance: ", 1 - loss)
    
    # rnd loss
    rnd_loss = rnd_loss.sum() / test_size
    print("rnd dim{} Wrong number{} in {}".format(dim, rnd_error_num, test_size))
    print("Average normalized performance of random: ", 1 - rnd_loss)

In [None]:
import _pickle as pickle
import math


def eval_performance_loss(dim, csv_path="./"):
    if dim % 32 == 0:
        approx_dim = dim
    else:
        approx_dim = (math.ceil(dim / 32) * 2 - 1) * 16
    pickle_path = "./rnd_tree_" + str(approx_dim) + ".pkl"
    rnd_tree = pickle.load(open(pickle_path, "rb"))
    performance_loss(rnd_tree, csv_path, dim)

In [None]:
eval_performance_loss(8, csv_path="../ParamSpMM-log/")
eval_performance_loss(16, csv_path="../ParamSpMM-log/")
eval_performance_loss(24, csv_path="../ParamSpMM-log/")
eval_performance_loss(32, csv_path="../ParamSpMM-log/")
eval_performance_loss(40, csv_path="../ParamSpMM-log/")
eval_performance_loss(48, csv_path="../ParamSpMM-log/")
eval_performance_loss(56, csv_path="../ParamSpMM-log/")
eval_performance_loss(64, csv_path="../ParamSpMM-log/")
eval_performance_loss(72, csv_path="../ParamSpMM-log/")
eval_performance_loss(80, csv_path="../ParamSpMM-log/")
eval_performance_loss(88, csv_path="../ParamSpMM-log/")
eval_performance_loss(96, csv_path="../ParamSpMM-log/")
eval_performance_loss(104, csv_path="../ParamSpMM-log/")
eval_performance_loss(112, csv_path="../ParamSpMM-log/")
eval_performance_loss(120, csv_path="../ParamSpMM-log/")
eval_performance_loss(128, csv_path="../ParamSpMM-log/")
