In [1]:
import numpy as np
import pandas as pd
import itertools
import random
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
from pathlib import Path
import warnings
import gc
import torch
import os
from copy import deepcopy
from torch import nn
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
from dataclasses import dataclass, field
import wandb
from dataclasses import asdict
from typing import Any
from scipy.stats import kendalltau
import json


GPU = "cuda:0"

In [79]:
rootdir = Path().resolve().parent.parent
inputdir = rootdir / "data" / "predict-ai-model-runtime"
embeddir = rootdir / "data" / "google-fast-vs-slowtest-embeddings/embeddings"
modeldir = rootdir / "data" / "google-fast-vs-slowfinetuning/finetuning-listmle"
workdir = Path().resolve() / "out"
workdir.mkdir(exist_ok=True, parents=True)

In [80]:
dataset_dict = {}

for ds in ["train", "valid", "test"]:
    records = []
    for arch, perm in itertools.product(["nlp", "xla"], ["default", "random"]):
        datadir = inputdir / f"npz_all/npz/layout/{arch}/{perm}/{ds}"
        for filepath in sorted(datadir.glob("*.npz")):
            filename = str(filepath).split("/")[-1].replace(".npz", "")
            records.append(
                {
                    "arch": arch,
                    "perm": perm,
                    "filename": filename,
                    "filepath": filepath,
                    "embed_filepath": embeddir / arch / perm / ds / f"{filename}.npz",
                }
            )
    dataset_dict[ds] = pd.DataFrame(records)

In [8]:
for ds in dataset_dict:
    indexes = []
    for i, row in dataset_dict[ds].iterrows():
        try:
            np.load(row["filepath"])
            np.load(row["embed_filepath"])
            indexes.append(i)
        except FileNotFoundError as e:
            raise e

    dataset_dict[ds] = dataset_dict[ds].iloc[indexes].reset_index(drop=True)

In [10]:
sheet_dict = pd.read_excel(
    "/home/yamaguchi/kaggle/data/clustering-1112.xlsx", sheet_name=None
)
# train_valid_dataset = pd.concat(
#     [dataset_dict["train"], dataset_dict["valid"]], axis=0, ignore_index=True
# )
# group_dict = {}
# for group_name in sheet_dict:
#     if not group_name.startswith("group"):
#         continue
#     dfgroup = sheet_dict[group_name].query("use == 1")[["arch", "perm", "filename"]]
#     dataset = dfgroup.merge(train_valid_dataset, on=["arch", "perm", "filename"])
#     sheet_dict[group_name]
#     print(group_name, dataset.shape, dfgroup.shape)
#     if dataset.shape[0] > 0:
#         group_dict[group_name] = dataset

In [11]:
def create_dataset_tensor(dataset, emb_scl, emb_mean):
    dataset_as_dict = {}
    for i, row in dataset.iterrows():
        fileobj = np.load(row["filepath"])
        embed_fileobj = np.load(row["embed_filepath"])
        config_runtime = fileobj["config_runtime"]
        target = np.argsort(np.argsort(-config_runtime))
        embeddings = embed_fileobj["embeddings"]

        dataset_as_dict[i] = {
            "arch": row["arch"],
            "perm": row["perm"],
            "filename": row["filename"],
            "target": target,
            "X": embeddings,
        }

    for i in dataset_as_dict:
        dataset_as_dict[i]["X"] = (dataset_as_dict[i]["X"] - emb_mean) / emb_scl

    return dataset_as_dict

In [12]:
@dataclass
class Params:
    device: str
    dims: list[int] = field(default_factory=lambda: [512, 512])
    epoch: int = 500
    T_max: int = 500
    eta_min: float = 0
    lr: float = 1e-5
    weight_decay: float = 0
    grad_clip_max_norm: float = 1.0
    grad_clip_norm_type: float = 2.0
    dropout_p: float = 0.05

    sample_size: int = 1000
    batch_size: int = 8

    num_feats: int = 192


params = Params(device=GPU if torch.cuda.is_available() else "cpu")


class FineTuningDataset(Dataset):
    def __init__(
        self,
        dataset_as_dict: dict[str, Any],
        params: Params,
    ) -> None:
        self.dataset_as_dict = dataset_as_dict
        self.params = params

    @property
    def device(self) -> str:
        return self.params.device

    def __len__(self) -> int:
        return len(self.dataset_as_dict)

    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
        dataset = self.dataset_as_dict[idx]
        num_configs = dataset["target"].shape[0]
        indexes = random.choices(list(range(num_configs)), k=self.params.sample_size)

        embeddings = torch.tensor(
            dataset["X"][indexes, :],
            dtype=torch.float32,
        ).to(self.device)
        target = torch.tensor(
            dataset["target"][indexes],
            dtype=torch.float32,
        ).to(self.device)

        return embeddings, target

    def get_entire(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
        dataset = self.dataset_as_dict[idx]
        embeddings = torch.tensor(
            dataset["X"],
            dtype=torch.float32,
        ).to(self.device)
        target = torch.tensor(
            dataset["target"],
            dtype=torch.float32,
        ).to(self.device)

        return embeddings, target

    def get_info(self, idx):
        dataset = self.dataset_as_dict[idx]
        return dataset["arch"], dataset["perm"], dataset["filename"]


class MLP(torch.nn.Module):
    def __init__(
        self,
        params: Params,
    ) -> None:
        super().__init__()
        self.params = params

        dims = [params.num_feats] + self.params.dims
        fc_layer = []
        for i in range(len(dims) - 1):
            fc_layer += [
                nn.Dropout(self.params.dropout_p),
                nn.Linear(
                    in_features=dims[i],
                    out_features=dims[i + 1],
                ),
                nn.ReLU(),
            ]
        fc_layer += [
            nn.Dropout(self.params.dropout_p),
            nn.Linear(
                in_features=dims[-1],
                out_features=1,
            ),
        ]

        self.net = nn.Sequential(*fc_layer)
        self.to(self.params.device)

    def forward(self, x) -> torch.Tensor:
        return self.net(x).squeeze()

In [13]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


def to_cpu_numpy(
    params: Params, pred: torch.Tensor, truth: torch.Tensor
) -> tuple[np.ndarray, np.ndarray]:
    if params.device == GPU:
        pred_ = pred.cpu().detach().numpy()
        truth_ = truth.cpu().detach().numpy()
        torch.cuda.empty_cache()
    else:
        pred_ = pred.detach().numpy()
        truth_ = truth.detach().numpy()
    return pred_, truth_

In [23]:
dftest_all = sheet_dict["test"][["arch", "perm", "filename", "group"]].copy()
groups = dftest_all["group"].unique()

In [18]:
workdir / 

PosixPath('/home/yamaguchi/kaggle/experiments/1117-finetuning/out')

### テスト


In [71]:
median_json, mean_json = [], []
for group in groups:
    group_name = f"group{group}"
    with open(modeldir / f"{group_name}.json", "r") as f:
        scler = json.load(f)
        emb_scl, emb_mean = np.array(scler["xscl"]), np.array(scler["xmean"])

    dftest = dftest_all.query(f"group == {group}").reset_index(drop=True)
    dftest = dftest.merge(dataset_dict["test"], on=["arch", "perm", "filename"])
    test_dataset_as_dict = create_dataset_tensor(
        dftest, emb_scl=emb_scl, emb_mean=emb_mean
    )
    test_dataset = FineTuningDataset(
        dataset_as_dict=test_dataset_as_dict, params=params
    )
    preds = [[] for _ in range(len(test_dataset))]
    for seed in range(5):
        for i in range(10):
            checkpointdir = modeldir / group_name / f"model-split{seed}-seed{40 + i}"
            print(checkpointdir)
            model = MLP(params=params)
            model.load_state_dict(
                torch.load(
                    checkpointdir / "best_model.pt", map_location=torch.device(GPU)
                )
            )
            model.eval()

            for i_data in range(len(test_dataset)):
                X, _ = test_dataset.get_entire(i_data)
                pred = model(X)
                pred = -pred.cpu().detach().numpy()
                preds[i_data].append(pred)

    records = dftest[["arch", "perm", "filename"]].to_dict("records")
    preds_median = []
    preds_mean = []
    for i_data, pred in enumerate(preds):
        pred_ = np.vstack(pred)
        pred_ = (pred_ - pred_.min(axis=0, keepdims=True)) / (
            pred_.max(axis=0, keepdims=True) - pred_.min(axis=0, keepdims=True)
        )
        record_median = records[i_data].copy()
        record_mean = records[i_data].copy()
        record_median["pred"] = np.median(pred_, axis=0).tolist()
        record_mean["pred"] = np.mean(pred_, axis=0).tolist()
        median_json.append(record_median)
        mean_json.append(record_mean)

/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group0/model-split0-seed40
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group0/model-split0-seed41
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group0/model-split0-seed42
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group0/model-split0-seed43
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group0/model-split0-seed44
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group0/model-split0-seed45
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group0/model-split0-seed46
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group0/model-split0-seed47
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group0/model-split0-seed48
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetu

In [72]:
with open(workdir / "finetuning-median.json", "w") as f:
    json.dump(median_json, f, indent=4)

with open(workdir / "finetuning-mean.json", "w") as f:
    json.dump(mean_json, f, indent=4)

In [108]:
### 検証

In [83]:
train_valid_dataset = pd.concat(
    [dataset_dict["train"], dataset_dict["valid"]], axis=0, ignore_index=True
)
group_dict = {}
for sheet_name in sheet_dict:
    if sheet_name.startswith("group"):
        dfdataset_ = sheet_dict[sheet_name].query("use == 1")[
            ["arch", "perm", "filename"]
        ]
        dfdataset = dfdataset_.merge(
            train_valid_dataset, on=["arch", "perm", "filename"]
        )

        print(sheet_name, dfdataset.shape[0], dfdataset_.shape[0])
        group_dict[sheet_name] = dfdataset.copy()

group17 18 18
group16 30 30
group15 50 50
group10 22 22
group9 19 19
group8 21 21
group7 10 10
group6 10 10
group5 53 53
group4 20 20
group3 20 20
group2 18 18
group1 64 64
group0 17 17


In [109]:
median_json, mean_json = [], []
for group_name in group_dict:
    with open(modeldir / f"{group_name}.json", "r") as f:
        scler = json.load(f)
        emb_scl, emb_mean = np.array(scler["xscl"]), np.array(scler["xmean"])

    dftest = group_dict[group_name].copy()
    test_dataset_as_dict = create_dataset_tensor(
        dftest, emb_scl=emb_scl, emb_mean=emb_mean
    )
    test_dataset = FineTuningDataset(
        dataset_as_dict=test_dataset_as_dict, params=params
    )
    preds = [[] for _ in range(len(test_dataset))]
    targets = [-1 for _ in range(len(test_dataset))]
    for seed in range(5):
        for i in range(10):
            checkpointdir = modeldir / group_name / f"model-split{seed}-seed{40 + i}"
            print(checkpointdir)
            model = MLP(params=params)
            model.load_state_dict(
                torch.load(
                    checkpointdir / "best_model.pt", map_location=torch.device(GPU)
                )
            )
            model.eval()

            for i_data in range(len(test_dataset)):
                X, target = test_dataset.get_entire(i_data)
                pred = model(X)
                pred = pred.cpu().detach().numpy()
                target = target.cpu().detach().numpy()
                preds[i_data].append(pred)
                targets[i_data] = target

    records = dftest[["arch", "perm", "filename"]].to_dict("records")
    preds_median = []
    preds_mean = []
    for i_data, pred in enumerate(preds):
        pred_ = np.vstack(pred)
        pred_ = (pred_ - pred_.min(axis=0, keepdims=True)) / (
            pred_.max(axis=0, keepdims=True) - pred_.min(axis=0, keepdims=True)
        )
        record_median = records[i_data].copy()
        record_mean = records[i_data].copy()
        record_median["score_median"] = kendalltau(
            targets[i_data], np.median(pred_, axis=0).tolist()
        ).correlation
        record_mean["score_mean"] = kendalltau(
            targets[i_data], np.mean(pred_, axis=0).tolist()
        ).correlation
        median_json.append(record_median)
        mean_json.append(record_mean)
    # break

/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group17/model-split0-seed40
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group17/model-split0-seed41
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group17/model-split0-seed42
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group17/model-split0-seed43
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group17/model-split0-seed44
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group17/model-split0-seed45
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group17/model-split0-seed46
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group17/model-split0-seed47
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuning/finetuning-listmle/group17/model-split0-seed48
/home/yamaguchi/kaggle/data/google-fast-vs-slowfinetuni

In [110]:
median_result = pd.DataFrame(median_json)
mean_result = pd.DataFrame(mean_json)

In [125]:
(median_result.groupby(["arch", "perm"])["score_median"].mean())

arch  perm   
nlp   default    0.169293
      random    -0.071631
xla   default    0.104448
      random     0.294911
Name: score_median, dtype: float64

In [127]:
(mean_result.groupby(["arch", "perm"])["score_mean"].mean())

arch  perm   
nlp   default    0.148532
      random    -0.059005
xla   default    0.120989
      random     0.296745
Name: score_mean, dtype: float64

In [135]:
mean_result.loc[
    (mean_result["arch"] == "nlp") & (mean_result["perm"] == "random")
].sort_values("score_mean").head(20)

Unnamed: 0,arch,perm,filename,score_mean
109,nlp,random,small_bert_bert_en_uncased_L-6_H-256_A-4_batch...,-0.880051
115,nlp,random,small_bert_bert_en_uncased_L-6_H-256_A-4_batch...,-0.860589
106,nlp,random,small_bert_bert_en_uncased_L-6_H-512_A-8_batch...,-0.855677
119,nlp,random,small_bert_bert_en_uncased_L-6_H-768_A-12_batc...,-0.827137
116,nlp,random,small_bert_bert_en_uncased_L-6_H-768_A-12_batc...,-0.825551
147,nlp,random,small_bert_bert_en_uncased_L-8_H-256_A-4_batch...,-0.819744
143,nlp,random,small_bert_bert_en_uncased_L-8_H-256_A-4_batch...,-0.816247
150,nlp,random,small_bert_bert_en_uncased_L-8_H-512_A-8_batch...,-0.80061
159,nlp,random,small_bert_bert_en_uncased_L-8_H-512_A-8_batch...,-0.797358
272,nlp,random,small_bert_bert_en_uncased_L-4_H-256_A-4_batch...,-0.773813


In [93]:
median_json

[{'arch': 'xla',
  'perm': 'default',
  'filename': 'alexnet_train_batch_32',
  'score_median': SignificanceResult(statistic=0.03944980485359089, pvalue=0.06176452615079691)},
 {'arch': 'xla',
  'perm': 'random',
  'filename': 'alexnet_train_batch_32',
  'score_median': SignificanceResult(statistic=-0.027357768136973108, pvalue=0.19518012602819124)},
 {'arch': 'xla',
  'perm': 'default',
  'filename': 'ncf.2x2.fp32',
  'score_median': SignificanceResult(statistic=0.005151176932882, pvalue=0.8072976925635)},
 {'arch': 'xla',
  'perm': 'random',
  'filename': 'ncf.2x2.fp32',
  'score_median': SignificanceResult(statistic=0.03036648795414914, pvalue=0.15046622171852794)},
 {'arch': 'xla',
  'perm': 'default',
  'filename': 'magenta_dynamic',
  'score_median': SignificanceResult(statistic=-0.026238395826674404, pvalue=0.2140836853734006)},
 {'arch': 'xla',
  'perm': 'random',
  'filename': 'magenta_dynamic',
  'score_median': SignificanceResult(statistic=0.029521817036684637, pvalue=0.1621

In [88]:
median_json[0]

{'arch': 'xla',
 'perm': 'default',
 'filename': 'alexnet_train_batch_32',
 'pred': [0.49675992131233215,
  0.40599876642227173,
  0.49408388137817383,
  0.32245123386383057,
  0.35592010617256165,
  0.38520336151123047,
  0.3407236933708191,
  0.41894665360450745,
  0.36792677640914917,
  0.4215999245643616,
  0.43021050095558167,
  0.37562096118927,
  0.35288935899734497,
  0.3328235149383545,
  0.46144282817840576,
  0.4896795153617859,
  0.3933863639831543,
  0.4121357202529907,
  0.4329027235507965,
  0.5147491693496704,
  0.3343982398509979,
  0.4216587543487549,
  0.47414350509643555,
  0.3313978612422943,
  0.3919655680656433,
  0.3558862805366516,
  0.4485747218132019,
  0.5324757099151611,
  0.5168415307998657,
  0.3918663263320923,
  0.34488391876220703,
  0.5360879302024841,
  0.3236544728279114,
  0.37466704845428467,
  0.4802681803703308,
  0.37765800952911377,
  0.4837113320827484,
  0.4468861520290375,
  0.3450690507888794,
  0.5053192973136902,
  0.3891289234161377,
  