In [43]:
import numpy as np
import pandas as pd
import json
from pathlib import Path
from scipy.stats import kendalltau

In [2]:
rootdir = Path().resolve()
datadir = rootdir / "final"
outdir = rootdir / "out"
outdir.mkdir(exist_ok=True, parents=True)

In [46]:
each_valid = {}
datadir_ = datadir / "each-valid"
for name in ["nlp-default", "nlp-random", "xla-default", "xla-random"]:
    with open(datadir_ / f"{name}.json", "r") as f:
        pred = json.load(f)

    for pred_ in pred:
        each_valid[f"{pred_['arch']};{pred_['perm']};{pred_['filename']}"] = {
            "pred": pred_["pred"],
            "target": pred_["target"],
        }

In [47]:
ranknet_valid = {}
datadir_ = datadir / "ranknet-valid"
with open(datadir_ / "ranknet.json", "r") as f:
    pred = json.load(f)

for pred_ in pred:
    ranknet_valid[f"{pred_['arch']};{pred_['perm']};{pred_['filename']}"] = {
        "pred": pred_["pred"],
        "target": pred_["target"],
    }

In [48]:
ranknet_random_valid = {}
datadir_ = datadir / "ranknet-random-valid"
with open(datadir_ / "ranknet-random.json", "r") as f:
    pred = json.load(f)

for pred_ in pred:
    ranknet_random_valid[f"{pred_['arch']};{pred_['perm']};{pred_['filename']}"] = {
        "pred": pred_["pred"],
        "target": pred_["target"],
    }

In [34]:
set(each_valid.keys()) - set(ranknet_valid.keys()), set(
    ranknet_random_valid.keys()
) - set(ranknet_valid.keys()), set(ranknet_random_valid.keys()) - set(each_valid.keys())

(set(), set(), set())

In [52]:
IDS = list(each_valid.keys())

In [65]:
records = []
for ID in IDS:
    arch, perm, filename = ID.split(";")
    pred = np.vstack(
        [
            each_valid[ID]["pred"],
            ranknet_valid[ID]["pred"],
            ranknet_random_valid[ID]["pred"],
        ]
    )
    pred = (pred - pred.min(axis=1, keepdims=True)) / (
        pred.max(axis=1, keepdims=True) - pred.min(axis=1, keepdims=True)
    )
    target = each_valid[ID]["target"]

    pred_mean = pred.mean(axis=0)
    pred_median = np.median(pred, axis=0)

    records.append(
        {
            "ID": ID,
            "arch": arch,
            "perm": perm,
            "filename": filename,
            "each": kendalltau(target, pred[0]).correlation,
            "ranknet": kendalltau(target, pred[1]).correlation,
            "ranknet-random": kendalltau(target, pred[2]).correlation,
            "median": kendalltau(target, pred_median).correlation,
            "mean": kendalltau(target, pred_mean).correlation,
        }
    )
dfscore = pd.DataFrame(records)

In [68]:
dfscore.head(1)

Unnamed: 0,ID,arch,perm,filename,each,ranknet,ranknet-random,median,mean
0,nlp;default;albert_en_xlarge_batch_size_16_test,nlp,default,albert_en_xlarge_batch_size_16_test,0.485212,0.431057,0.445358,0.479636,0.461653


In [76]:
dfsmr = dfscore.groupby(["arch", "perm"])[
    ["each", "ranknet", "ranknet-random", "median", "mean"]
].mean()
dfsmr

Unnamed: 0_level_0,Unnamed: 1_level_0,each,ranknet,ranknet-random,median,mean
arch,perm,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
nlp,default,0.47279,0.452575,0.475452,0.475771,0.470139
nlp,random,0.803031,0.800421,0.825084,0.815617,0.813274
xla,default,0.301647,0.195816,0.287921,0.280438,0.290565
xla,random,0.605984,0.538181,0.575021,0.585852,0.574332


In [79]:
(dfsmr.sum() + 1) / 5

each              0.636690
ranknet           0.597399
ranknet-random    0.632695
median            0.631535
mean              0.629662
dtype: float64

In [81]:
each = []
datadir_ = datadir / "each-valid"
for name in ["nlp-default", "nlp-random", "xla-default", "xla-random"]:
    with open(datadir_ / f"{name}.json", "r") as f:
        pred = json.load(f)

    each += pred

In [117]:
each_infer = {}
datadir_ = datadir / "each-inference"
for name in ["nlp-default", "nlp-random", "xla-default", "xla-random"]:
    with open(datadir_ / f"{name}.json", "r") as f:
        pred = json.load(f)

    for pred_ in pred:
        each_infer[
            f"layout:{pred_['arch']}:{pred_['perm']}:{pred_['filename']}"
        ] = pred_["pred"]

In [118]:
ranknet_infer = {}
datadir_ = datadir / "ranknet-inference"
with open(datadir_ / "ranknet.json", "r") as f:
    pred = json.load(f)

for pred_ in pred:
    ranknet_infer[
        f"layout:{pred_['arch']}:{pred_['perm']}:{pred_['filename']}"
    ] = pred_["pred"]

In [119]:
ranknet_random_infer = {}
datadir_ = datadir / "ranknet-random-inference"
with open(datadir_ / "ranknet-random.json", "r") as f:
    pred = json.load(f)

for pred_ in pred:
    ranknet_random_infer[
        f"layout:{pred_['arch']}:{pred_['perm']}:{pred_['filename']}"
    ] = pred_["pred"]

In [120]:
set(ranknet_random_infer.keys()) - set(ranknet_infer.keys()), set(
    each_infer.keys()
) - set(ranknet_random_infer.keys()), set(each_infer.keys()) - set(ranknet_infer.keys())

(set(), set(), set())

In [121]:
IDS = list(each_infer.keys())

In [122]:
records = []
for ID in IDS:
    pred = np.vstack(
        [
            each_infer[ID],
            ranknet_infer[ID],
            ranknet_random_infer[ID],
        ]
    )
    pred = (pred - pred.min(axis=1, keepdims=True)) / (
        pred.max(axis=1, keepdims=True) - pred.min(axis=1, keepdims=True)
    )

    pred_mean = pred.mean(axis=0)
    pred_median = np.median(pred, axis=0)

    records.append(
        {
            "ID": ID,
            "each": ";".join(list(map(str, pred[0].argsort()))),
            "ranknet": ";".join(list(map(str, pred[1].argsort()))),
            "ranknet-random": ";".join(list(map(str, pred[2].argsort()))),
            "median": ";".join(list(map(str, pred_median.argsort()))),
            "mean": ";".join(list(map(str, pred_mean.argsort()))),
        }
    )
dfscore = pd.DataFrame(records)

In [123]:
for c in dfscore.drop(columns=["ID"]).columns:
    dfscore[["ID", c]].rename(columns={c: "TopConfigs"}).to_csv(
        outdir / f"{c}-submission.csv", index=False
    )