In [99]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F


class MAB(nn.Module):
    def __init__(self, dim_Q, dim_K, dim_V, num_heads, ln=False):
        super(MAB, self).__init__()
        self.dim_V = dim_V
        self.num_heads = num_heads
        self.fc_q = nn.Linear(dim_Q, dim_V)
        self.fc_k = nn.Linear(dim_K, dim_V)
        self.fc_v = nn.Linear(dim_K, dim_V)
        if ln:
            self.ln0 = nn.LayerNorm(dim_V)
            self.ln1 = nn.LayerNorm(dim_V)
        self.fc_o = nn.Linear(dim_V, dim_V)

    def forward(self, Q, K):
        Q = self.fc_q(Q)
        K, V = self.fc_k(K), self.fc_v(K)

        dim_split = self.dim_V // self.num_heads
        Q_ = torch.cat(Q.split(dim_split, 2), 0)
        K_ = torch.cat(K.split(dim_split, 2), 0)
        V_ = torch.cat(V.split(dim_split, 2), 0)

        A = torch.softmax(Q_.bmm(K_.transpose(1, 2)) / math.sqrt(self.dim_V), 2)
        O = torch.cat((Q_ + A.bmm(V_)).split(Q.size(0), 0), 2)
        O = O if getattr(self, "ln0", None) is None else self.ln0(O)
        O = O + F.relu(self.fc_o(O))
        O = O if getattr(self, "ln1", None) is None else self.ln1(O)
        return O


class SAB(nn.Module):
    def __init__(self, dim_in, dim_out, num_heads, ln=False):
        super(SAB, self).__init__()
        self.mab = MAB(dim_in, dim_in, dim_out, num_heads, ln=ln)

    def forward(self, X):
        return self.mab(X, X)


class ISAB(nn.Module):
    def __init__(self, dim_in, dim_out, num_heads, num_inds, ln=False):
        super(ISAB, self).__init__()
        self.I = nn.Parameter(torch.Tensor(1, num_inds, dim_out))
        nn.init.xavier_uniform_(self.I)
        self.mab0 = MAB(dim_out, dim_in, dim_out, num_heads, ln=ln)
        self.mab1 = MAB(dim_in, dim_out, dim_out, num_heads, ln=ln)

    def forward(self, X):
        H = self.mab0(self.I.repeat(X.size(0), 1, 1), X)
        return self.mab1(X, H)


class PMA(nn.Module):
    def __init__(self, dim, num_heads, num_seeds, ln=False):
        super(PMA, self).__init__()
        self.S = nn.Parameter(torch.Tensor(1, num_seeds, dim))
        nn.init.xavier_uniform_(self.S)
        self.mab = MAB(dim, dim, dim, num_heads, ln=ln)

    def forward(self, X):
        return self.mab(self.S.repeat(X.size(0), 1, 1), X)


class SetTransformer(nn.Module):
    def __init__(
        self, num_animes, embed_dim, dim_output, num_layers=3, num_inds=32, dim_hidden=128, num_heads=4, ln=False
    ):
        super(SetTransformer, self).__init__()

        self.embedding = nn.Embedding(num_animes + 1, embed_dim)  # num_animes+1 は穴埋め用データ

        layers = []
        layers.append(ISAB(embed_dim, dim_hidden, num_heads, num_inds, ln=ln))
        for _ in range(num_layers - 1):
            layers.append(ISAB(dim_hidden, dim_hidden, num_heads, num_inds, ln=ln))
        layers.append(nn.Linear(dim_hidden, 1))
        self.enc = nn.Sequential(*layers)

    def forward(self, X):
        X_embed = self.embedding(X)  # Embed anime_id
        return self.enc(X_embed).squeeze(2)  # Return shape: [batch_size, num_items, dim_output]

In [100]:
import os
import pickle
import random
import sys
import uuid
from pathlib import Path

import implicit
import lightgbm as lgb
import numpy as np
import pandas as pd
import torch.optim as optim
from scipy.sparse import csr_matrix, random
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Dataset

# 最大表示列数の指定（ここでは50列を指定）
pd.set_option("display.max_columns", 50)

sys.path.append(os.pardir)
from hydra import compose, initialize

from utils import load_datasets
from utils.embedding import TextEmbedder

with initialize(config_path="../yamls", version_base=None):
    config = compose(config_name="config.yaml")


train_df = pd.read_csv(Path(config.input_path) / "train.csv")
test_df = pd.read_csv(Path(config.input_path) / "test.csv")
all_df = pd.concat([train_df, test_df])
all_df["anime_label"], anime_idx = pd.factorize(all_df["anime_id"])
train_df["anime_label"] = all_df[: len(train_df)]["anime_label"]
test_df["anime_label"] = all_df[len(train_df) :]["anime_label"]

In [101]:
train_df["score"].mean()

7.768770023680179

In [104]:
from sklearn.model_selection import StratifiedGroupKFold
from tqdm.auto import tqdm

output_path = Path(f".")
device = "cpu"  # "cuda" if torch.cuda.is_available() else "cpu"

train_df = train_df.head(10000)
test_df = test_df.head(2000)


oof_pred = np.zeros(train_df.shape[0])
test_preds_all = []

test_grouped_anime = test_df.groupby("user_id")["anime_label"].apply(list)


import random

kf = StratifiedGroupKFold(n_splits=config.nn.num_folds, shuffle=True, random_state=config.seed)
for fold, (train_index, valid_index) in enumerate(kf.split(train_df, train_df["score"], train_df["user_id"])):
    print(f"Fold {fold} start !")
    # user_idごとにanime_idのリストと平均スコアを取得
    train_grouped_anime = train_df.iloc[train_index].groupby("user_id")["anime_label"].apply(list)
    train_grouped_score = train_df.iloc[train_index].groupby("user_id")["score"].apply(list)
    valid_grouped_anime = train_df.iloc[valid_index].groupby("user_id")["anime_label"].apply(list)
    valid_grouped_score = train_df.iloc[valid_index].groupby("user_id")["score"].apply(list)
    test_grouped_anime = test_df.groupby("user_id")["anime_label"].apply(list)

    train_dataset = AnimeDataset(num_animes, train_grouped_anime, train_grouped_score, num_samples)
    valid_dataset = AnimeDataset(num_animes, valid_grouped_anime, valid_grouped_score, num_samples)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

    model = SetTransformer(
        num_animes=num_animes,
        embed_dim=embed_dim,
        dim_output=dim_output,  # Since we want to predict scores, the dim_output is 1.
    ).to(device)

    criterion = nn.MSELoss()  # Using MSE as the loss for RMSE
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_val_loss = float("inf")
    early_stopping_counter = 0

    print("Train")
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for i, (data, scores) in enumerate(tqdm(train_loader)):
            data, scores = data.to(device), scores.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = torch.sqrt(criterion(outputs, scores))
            loss.backward()
            optimizer.step()
            train_loss += loss.item() ** 2

        # Validate the model
        model.eval()
        valid_loss = 0
        with torch.no_grad():
            for i, (data, scores) in enumerate(tqdm(valid_loader)):
                data, scores = data.to(device), scores.to(device)
                outputs = model(data)
                loss = torch.sqrt(criterion(outputs.squeeze(), scores))
                valid_loss += loss.item() ** 2

        train_loss = np.sqrt(train_loss / len(train_loader))
        valid_loss = np.sqrt(valid_loss / len(valid_loader))
        print(f"Epoch{epoch}: Training Loss {train_loss},  Validation Loss {valid_loss}")
        """
        wandb.log(
            {"epoch": epoch, f"nn/train_loss/fold-{fold}": train_loss, f"nn/valid_loss/fold-{fold}": valid_loss}
        )
        """
        # Save the model if it has the best score so far
        if valid_loss < best_val_loss:
            torch.save(model.state_dict(), f"{output_path}/best_model.pt")
            best_val_loss = valid_loss
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= config.nn.early_stopping:
                break

    model.load_state_dict(torch.load(f"{output_path}/best_model.pt"))
    model.eval()
    valid_dataset = AnimeDataset(num_animes, valid_grouped_anime, valid_grouped_score, None)
    test_dataset = AnimeDataset(num_animes, test_grouped_anime, None, None)
    valid_preds = []
    test_preds = []

    with torch.no_grad():
        for idx in range(len(valid_dataset)):
            valid_pred = []
            for itts in range(num_tts):  # indexをシャッフルしてn個ずつに分割して推論を繰り返す
                data, _ = valid_dataset[idx]
                data_len = len(data)
                one_time_pred = np.zeros(len(data))
                data, scores = data.to(device), scores.to(device)

                if data_len <= num_samples:  # １回でOK
                    one_time_pred = model(data.unsqueeze(dim=0)).numpy().flatten()[:data_len]
                    valid_pred.append(one_time_pred)
                    break
                else:
                    chunk_index_list = make_chunk_index_list(len(data), num_samples)
                    aggregated_predictions = []
                    for chunk_index in chunk_index_list:
                        one_time_pred[chunk_index] = model(data[chunk_index].unsqueeze(dim=0)).numpy().flatten()
                    valid_pred.append(one_time_pred)
            valid_preds.append(np.mean(valid_pred, axis=0))

        for idx in range(len(test_dataset)):
            test_pred = []
            for itts in range(num_tts):  # indexをシャッフルしてn個ずつに分割して推論を繰り返す
                data = test_dataset[idx]
                data_len = len(data)
                one_time_pred = np.zeros(len(data))
                data, scores = data.to(device), scores.to(device)

                if data_len <= num_samples:  # １回でOK
                    one_time_pred = model(data.unsqueeze(dim=0)).numpy().flatten()[:data_len]
                    test_pred.append(one_time_pred)
                    break
                else:
                    chunk_index_list = make_chunk_index_list(len(data), num_samples)
                    aggregated_predictions = []

                    for chunk_index in chunk_index_list:
                        one_time_pred[chunk_index] = model(data[chunk_index].unsqueeze(dim=0)).numpy().flatten()

                    test_pred.append(one_time_pred)
            test_preds.append(np.mean(test_pred, axis=0))

    oof_pred[valid_index] = np.concatenate(valid_preds)  # group 化してあるものを 1d に戻す
    test_preds_all.append(np.concatenate(test_preds))

Fold 0 start !
Train


  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch0: Training Loss 5.339135652927799,  Validation Loss 3.615911900683899


  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch1: Training Loss 3.4186819352556443,  Validation Loss 1.678759555012833
Fold 1 start !
Train


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch0: Training Loss 4.901848770588641,  Validation Loss 2.0002497565485156


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch1: Training Loss 1.7223258653821398,  Validation Loss 1.7764008017736426
Fold 2 start !
Train


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch0: Training Loss 5.296896346630629,  Validation Loss 2.1614353258638235


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch1: Training Loss 1.919488730874751,  Validation Loss 1.604829675877223
Fold 3 start !
Train


  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch0: Training Loss 5.250569074617688,  Validation Loss 1.8972786525653045


  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch1: Training Loss 1.9567877773405296,  Validation Loss 2.55394148332572
Fold 4 start !
Train


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch0: Training Loss 4.585021445104698,  Validation Loss 3.1258762422785056


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch1: Training Loss 2.1479635831288832,  Validation Loss 1.893505764666031


In [106]:
oof_pred

array([7.81555963, 7.469069  , 8.48040056, ..., 6.66080594, 7.49263716,
       7.11457157])