In [1]:
import numpy as np
import pandas as pd
import itertools
import random
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
from pathlib import Path
import warnings
import gc
import torch
import os
from copy import deepcopy
from torch import nn
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
from dataclasses import dataclass, field
import wandb
from dataclasses import asdict
from typing import Any
from scipy.stats import kendalltau
import json


GPU = "cuda:0"

In [2]:
rootdir = Path().resolve().parent.parent
inputdir = rootdir / "data" / "predict-ai-model-runtime"
embeddir = Path().resolve() / "out" / "ranknet" / "embeddings"
workdir = Path().resolve() / "out" / "finetuning-listmle-holdout"
workdir.mkdir(exist_ok=True, parents=True)

In [3]:
dataset_dict = {}

for ds in ["train", "valid", "test"]:
    records = []
    for arch, perm in itertools.product(["nlp", "xla"], ["default", "random"]):
        datadir = inputdir / f"npz_all/npz/layout/{arch}/{perm}/{ds}"
        for filepath in sorted(datadir.glob("*.npz")):
            filename = str(filepath).split("/")[-1].replace(".npz", "")
            records.append(
                {
                    "arch": arch,
                    "perm": perm,
                    "filename": filename,
                    "filepath": filepath,
                    "embed_filepath": embeddir / arch / perm / ds / f"{filename}.npz",
                }
            )
    dataset_dict[ds] = pd.DataFrame(records)

In [4]:
for ds in dataset_dict:
    indexes = []
    for i, row in dataset_dict[ds].iterrows():
        try:
            np.load(row["filepath"])
            np.load(row["embed_filepath"])
            indexes.append(i)
        except FileNotFoundError as e:
            print(row["embed_filepath"])

    dataset_dict[ds] = dataset_dict[ds].iloc[indexes].reset_index(drop=True)

In [5]:
def create_dataset_tensor(dataset, filename):
    dataset_as_dict = {}
    embeddings_list = []
    for i, row in tqdm(dataset.iterrows()):
        fileobj = np.load(row["filepath"])
        embed_fileobj = np.load(row["embed_filepath"])
        config_runtime = fileobj["config_runtime"]
        target = np.argsort(np.argsort(-config_runtime))
        embeddings = embed_fileobj["embeddings"]

        dataset_as_dict[i] = {
            "arch": row["arch"],
            "perm": row["perm"],
            "filename": row["filename"],
            "target": target,
            "X": embeddings,
        }
        embeddings_list.append(embeddings)
    embeddings_list = np.concatenate(embeddings_list, axis=0)

    emb_scl = embeddings_list.max(axis=0) - embeddings_list.min(axis=0)
    emb_mean = embeddings_list.mean(axis=0)
    del embeddings_list

    for i in dataset_as_dict:
        dataset_as_dict[i]["X"] = (dataset_as_dict[i]["X"] - emb_mean) / emb_scl

    with open(workdir / f"{filename}.json", "w") as f:
        json.dump({"xmean": emb_mean.tolist(), "xscl": emb_scl.tolist()}, f, indent=4)
    return dataset_as_dict


def create_valid_dataset_tensor(dataset, filename):
    dataset_as_dict = {}
    for i, row in tqdm(dataset.iterrows()):
        fileobj = np.load(row["filepath"])
        embed_fileobj = np.load(row["embed_filepath"])
        config_runtime = fileobj["config_runtime"]
        target = np.argsort(np.argsort(-config_runtime))
        embeddings = embed_fileobj["embeddings"]

        dataset_as_dict[i] = {
            "arch": row["arch"],
            "perm": row["perm"],
            "filename": row["filename"],
            "target": target,
            "X": embeddings,
        }

    with open(workdir / f"{filename}.json", "r") as f:
        scler = json.load(f)
        emb_scl, emb_mean = np.array(scler["xscl"]), np.array(scler["xmean"])

    for i in dataset_as_dict:
        dataset_as_dict[i]["X"] = (dataset_as_dict[i]["X"] - emb_mean) / emb_scl

    return dataset_as_dict

In [14]:
@dataclass
class Params:
    device: str
    dims: list[int] = field(default_factory=lambda: [512, 512, 512])
    epoch: int = 500
    T_max: int = 500
    eta_min: float = 1e-6
    lr: float = 1e-4
    weight_decay: float = 0
    grad_clip_max_norm: float = 1.0
    grad_clip_norm_type: float = 2.0

    sample_size: int = 1000
    batch_size: int = 32

    num_feats: int = 192


params = Params(device=GPU if torch.cuda.is_available() else "cpu")


class FineTuningDataset(Dataset):
    def __init__(
        self,
        dataset_as_dict: dict[str, Any],
        params: Params,
    ) -> None:
        self.dataset_as_dict = dataset_as_dict
        self.params = params

    @property
    def device(self) -> str:
        return self.params.device

    def __len__(self) -> int:
        return len(self.dataset_as_dict)

    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
        dataset = self.dataset_as_dict[idx]
        num_configs = dataset["target"].shape[0]
        indexes = random.choices(list(range(num_configs)), k=self.params.sample_size)

        embeddings = torch.tensor(
            dataset["X"][indexes, :],
            dtype=torch.float32,
        ).to(self.device)
        target = torch.tensor(
            dataset["target"][indexes],
            dtype=torch.float32,
        ).to(self.device)

        return embeddings, target

    def get_info(self, idx):
        dataset = self.dataset_as_dict[idx]
        return dataset["arch"], dataset["perm"], dataset["filename"]


class MLP(torch.nn.Module):
    def __init__(
        self,
        params: Params,
    ) -> None:
        super().__init__()
        self.params = params

        dims = [params.num_feats] + self.params.dims
        fc_layer = []
        for i in range(len(dims) - 1):
            fc_layer += [
                nn.Linear(
                    in_features=dims[i],
                    out_features=dims[i + 1],
                ),
                nn.ReLU(),
            ]
        fc_layer += [
            nn.Linear(
                in_features=dims[-1],
                out_features=1,
            ),
        ]

        self.net = nn.Sequential(*fc_layer)
        self.to(self.params.device)

    def forward(self, x) -> torch.Tensor:
        return self.net(x).squeeze()

In [7]:
def rankNet(y_pred, y_true):
    """
    RankNet loss introduced in "Learning to Rank using Gradient Descent".
    :param y_pred: predictions from the model, shape [batch_size, slate_length]
    :param y_true: ground truth labels, shape [batch_size, slate_length]
    :return: loss value, a torch.Tensor
    """
    y_pred = y_pred.clone()
    y_true = y_true.clone()

    document_pairs_candidates = list(itertools.combinations(range(y_true.shape[1]), 2))

    pairs_true = y_true[:, document_pairs_candidates]
    selected_pred = y_pred[:, document_pairs_candidates]

    true_diffs = pairs_true[:, :, 0] - pairs_true[:, :, 1]
    pred_diffs = selected_pred[:, :, 0] - selected_pred[:, :, 1]

    the_mask = (true_diffs > 0) & (~torch.isinf(true_diffs))
    pred_diffs = pred_diffs[the_mask]

    true_diffs = (true_diffs > 0).type(torch.float32)
    true_diffs = true_diffs[the_mask]

    return nn.BCEWithLogitsLoss()(pred_diffs, true_diffs)


class ListMLE(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
        """

        Parameters
        ----------
        logits: torch.Tensor
            予測（バッチサイズ, 要素数, ）
        labels: torch.Tensor
            目的変数（バッチサイズ, 要素数, ）

        Returns
        -------
        torch.Tensor
        """
        # 正解をソート
        labels_sorted, labels_sorted_indice = labels.sort(descending=True, dim=1)
        # 予測を正解順でソート
        logits_sorted_by_true = torch.gather(logits, dim=1, index=labels_sorted_indice)
        # 予測値の最大値で予測値を引く（expの爆発予防）
        logits_max, _ = logits_sorted_by_true.max(dim=1, keepdim=True)
        logits_sorted_by_true = logits_sorted_by_true - logits_max
        # ランキングが低いものから累積する(その後正解順に戻す)
        cumsums = torch.cumsum(logits_sorted_by_true.exp().flip(dims=[1]), dim=1).flip(
            dims=[1]
        )
        # 誤差
        negative_log_likelihood = torch.sum(
            torch.log(cumsums) - logits_sorted_by_true, dim=1
        )
        return torch.mean(negative_log_likelihood)

In [8]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


def to_cpu_numpy(
    params: Params, pred: torch.Tensor, truth: torch.Tensor
) -> tuple[np.ndarray, np.ndarray]:
    if params.device == GPU:
        pred_ = pred.cpu().detach().numpy()
        truth_ = truth.cpu().detach().numpy()
        torch.cuda.empty_cache()
    else:
        pred_ = pred.detach().numpy()
        truth_ = truth.detach().numpy()
    return pred_, truth_


def train(params, train_dataset_as_dict, valid_dataset_as_dict, savedir):
    train_dataset = FineTuningDataset(
        dataset_as_dict=train_dataset_as_dict, params=params
    )
    valid_dataset = FineTuningDataset(
        dataset_as_dict=valid_dataset_as_dict, params=params
    )
    train_dataloader = DataLoader(
        train_dataset, batch_size=params.batch_size, shuffle=True
    )

    model = MLP(params=params)
    optimizer = torch.optim.Adam(
        model.parameters(), lr=params.lr, weight_decay=params.weight_decay
    )
    scheduler = CosineAnnealingLR(
        optimizer=optimizer, T_max=params.T_max, eta_min=params.eta_min
    )
    criterion = ListMLE()

    pbar = tqdm(range(params.epoch))
    num_train_log, num_valid_log = 0, 0
    for epoch in range(params.epoch):
        model.train()

        num_iters = len(train_dataloader)
        for i_iter, (X, target) in enumerate(train_dataloader):
            pred = model(X)

            if (len(pred.shape) == 1) or (len(target.shape) == 1):
                pred, target = pred.reshape(1, -1), target.reshape(1, -1)
            loss = criterion(pred, target)
            loss.backward()
            nn.utils.clip_grad_norm_(
                model.parameters(),
                max_norm=params.grad_clip_max_norm,
                norm_type=params.grad_clip_norm_type,
            )
            optimizer.step()
            scheduler.step(epoch + i_iter / num_iters)
            optimizer.zero_grad()

            pred, target = to_cpu_numpy(params, pred, target)

            scores = []
            for pred_, target_ in zip(pred, target):
                score = kendalltau(target_, pred_).correlation
                wandb.log(
                    {
                        "epoch": epoch,
                        "iter": i_iter,
                        "lr": scheduler.get_last_lr()[0],
                        "train/score": score,
                        "train/loss": loss,
                        "train/pred": pred,
                        "train/target": target,
                        "train/count": num_train_log,
                    }
                )
                num_train_log += 1
                scores.append(score)
            scores = np.array(scores)
            # pbar.set_description(
            #     f"[{epoch + 1}] score={np.mean(scores[~np.isnan(scores)]):.3f} loss={loss.item():5f}"
            # )

        model.eval()
        losses, scores = [], []
        for i_graph in range(len(valid_dataset)):
            arch, perm, filename = valid_dataset.get_info(i_graph)
            X, target = valid_dataset[i_graph]
            pred = model(X)
            loss = criterion(pred.reshape(1, -1), target.reshape(1, -1))
            graph_loss = loss.item()
            pred, target = to_cpu_numpy(params, pred, target)
            score = kendalltau(target, pred).correlation

            wandb.log(
                {
                    "epoch": epoch,
                    "iter": i_iter,
                    "lr": scheduler.get_last_lr()[0],
                    "valid/score": score,
                    "valid/loss": graph_loss,
                    "valid/pred": pred,
                    "valid/target": target,
                    "valid/count": num_valid_log,
                }
            )
            num_valid_log += 1
            losses.append(graph_loss)
            scores.append(score)
        losses = np.array(losses)
        scores = np.array(scores)
        # print(
        #     f"[{epoch + 1}] valid score={np.mean(scores[~np.isnan(scores)]):.3f} valid loss={np.mean(losses[~np.isnan(losses)]):5f}"
        # )

        if (epoch + 1) % 10 == 0:
            torch.save(model.state_dict(), savedir / f"epoch{epoch + 1}_model.pt")
        pbar.update(1)
        pbar.set_description(
            f"[{epoch + 1}] valid score={np.mean(scores[~np.isnan(scores)]):.3f} valid loss={np.mean(losses[~np.isnan(losses)]):5f}"
        )

In [11]:
train_dataset_as_dict = create_dataset_tensor(
    dataset=dataset_dict["train"], filename="scl"
)
valid_dataset_as_dict = create_valid_dataset_tensor(
    dataset=dataset_dict["valid"], filename="scl"
)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [15]:
exptname = f"1108-勾配クリップ-ミスマッチ-listmle-finetuning-holdout"

wandb.init(
    project="predict-ai-model-runtime-for-sun-scan-clan",
    config={
        "params": asdict(params),
    },
    name=exptname,
)

seed_everything(43)

savedir = workdir
savedir.mkdir(exist_ok=True, parents=True)
train(
    params=params,
    train_dataset_as_dict=train_dataset_as_dict,
    valid_dataset_as_dict=valid_dataset_as_dict,
    savedir=savedir,
)

wandb.finish()

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
iter,▂▄█▁▅█▂▆▇▃▆█▄▇█▄▆▁▅▇▂▄▇▃▅█▄▅█▂▆▁▃▇█▄▇▁▅█
lr,██████████▇▇▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
train/count,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,███▇▇▇▆▆▅▅▄▅▃▄▄█▃▁▂▄▃▃▅▂▄▄▅▁▄▄▃▁▄▂▄▄▂▃▃▃
train/score,▆ ▂▆▇▃▅▄█▁▆▄▆▂▄▃▂█▇▃▃▇▆▇▃▄▄▅▄██▃▆▃▄▃█▂▃▅
valid/count,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
valid/loss,▅▅▄▄▄▅▄▃▅▄▃▄▄▅▄▄▄▄▄▅▄▃▄▄▂▄▄▁▇▄▃▅▃▄▄▃█▄▃▇
valid/score,▄▇▇▄▇▁▄▇▃▄▇▄▄▇▅▅▂▅▄▂▄▇▂▄▇▅▅█▅▃▇▃▇▆▄▇▇▅▇▄

0,1
epoch,14.0
iter,1.0
lr,0.0001
train/count,7505.0
train/loss,5323.70215
train/score,0.44449
valid/count,755.0
valid/loss,5843.60254
valid/score,0.4411


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112815162373915, max=1.0…

  0%|          | 0/500 [00:00<?, ?it/s]



VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
iter,███▇▆▅▅▄▃▃▂▁██▇▇▆▅▅▄▄▃▂▁███▇▆▅▅▄▄▃▂▂▁██▇
lr,███████▇▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
train/count,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/loss,█▅▅▆▅▄▅▆▅▃▄▃▂▄▄▃▂▃▅▃▄▄▃▃▄▃▄▃▃▂▃▃▁▃▃▂▂▄▄▄
train/score,▁▅▅▆▅█▃▄▅▇▇▆▇▆▆▄▇▃▃▃▁▅▄█▄█▇▅█▆▇█▇▄█████▅
valid/count,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/loss,▆▆▅▄▆▆▄▅▆▃▇▆▅▇▆▆▇▅▆▆▇▆▅▇▆▁▇▅▂▆▇▃▅█▅▁▇▅▂▆
valid/score,▆▃▆▆▃▂▇▅▄█▃▄▆▄▃▃▁▅▄▃▃▄▆▃▃█▁▅▇▃▃▇▇▂▇█▁▄▇▃

0,1
epoch,499.0
iter,16.0
lr,0.0
train/count,267499.0
train/loss,4954.85938
train/score,0.88406
valid/count,26999.0
valid/loss,5836.83008
valid/score,0.31367
