In [1]:
import gc
import os
import time
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import polars as pl
import safetensors
import seaborn as sns
import torch
from dotenv import load_dotenv
from hydra import compose, initialize
from jinja2 import Template
from matplotlib import pyplot as plt
from omegaconf import OmegaConf
from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model, prepare_model_for_kbit_training
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from src.seed import seed_everything
from torch import nn
from torch.utils.data import DataLoader, Dataset
from tqdm.auto import tqdm
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,  # AutoModelForCausalLM: 次の単語を予測するモデル
    AutoTokenizer,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
)
from transformers.modeling_outputs import SequenceClassifierOutput
from transformers.utils import is_torch_bf16_gpu_available
from trl import DataCollatorForCompletionOnlyLM

warnings.filterwarnings("ignore")

with initialize(config_path="config", version_base=None):
    cfg = compose(config_name="config")
    cfg.exp_number = Path().resolve().name
print(OmegaConf.to_yaml(cfg, resolve=True))

seed_everything(cfg.seed)

# .envファイルから環境変数を読み込む
load_dotenv()

# bf16が使えるか確認
print(f"bf16: {is_torch_bf16_gpu_available()}")

# デバイスの指定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


gemma:
  params:
    model_path: unsloth/gemma-2-9b-it-bnb-4bit
    metric: auc
    max_length: 192
    fp16: true
    learning_rate: 2.0e-05
    epochs: 2
    per_device_train_batch_size: 8
    per_device_eval_batch_size: 8
    steps: 100
    lr_scheduler_type: cosine
    weight_decay: 0.01
exp_number: '016'
run_name: base
data:
  data_root: ../../data
  results_root: ../../results
  train_path: ../../data/train.csv
  cloth_path: ../../data/clothing_master.csv
  test_path: ../../data/test.csv
  sample_submission_path: ../../data/sample_submission.csv
  results_dir: ../../results/016/base
seed: 42
n_splits: 4
target: Recommended IND

bf16: True


### LLMのQLoRAによるファインチューニング(Instruction Tuning)を実装
- 1st solutionの公開notebookをベースに実装
- https://www.guruguru.science/competitions/24/discussions/21027ff1-2074-4e21-a249-b2d4170bd516/
- 1st solではQLoRAを使用していなかったので、QLoRAを使用するように変更
- モデルはunsloth/gemma-2-9b-it-bnb-4bit（gemma2を4bit量化したもの）
- SequenceClassificationHeadは使用せず、LanguageModelHeadを使用
- クラスラベル(0/1)をYes/Noで答えさせるようにInstruction Tuning
- 推論時はこの位置の前まで入力して、Next Token Predictionの確率をクラスの予測として扱う
- 1foldあたりの学習時間は10時間程度（GPUはRTX-4070）
- テストデータの推論時間は26時間程度（GPUはRTX-4070）

### データの準備


In [2]:
train_df = pl.read_csv(cfg.data.train_path, try_parse_dates=True).fill_null("")
test_df = pl.read_csv(cfg.data.test_path, try_parse_dates=True).fill_null("")

skf = StratifiedKFold(n_splits=cfg.n_splits, shuffle=True, random_state=cfg.seed)


In [3]:
DEBUG = False

if DEBUG:
    train_df = train_df.head(100)


# target列をlabelsに改名
train_df = train_df.rename({cfg.target: "labels"})
train_df.head(3)


Clothing ID,Age,Title,Review Text,Rating,labels,Positive Feedback Count
i64,i64,str,str,i64,i64,i64
0,25,"""3-season skirt!""","""Adorable, well-made skirt! lin…",5,1,4
0,39,"""Very cute""","""Love the asymmetrical hem. wai…",5,1,0
0,42,"""Beautiful! fruns small for typ…","""I love this skirt! i wasn't su…",5,1,5


In [4]:
# tokenizer(初回のロードにはhuggingfaceのtokenが必要)
tokenizer = AutoTokenizer.from_pretrained(cfg.gemma.params.model_path, token=os.environ["HF_TOKEN"])

# Decorderモデルなので左paddingを指定（デフォルトはright）
# 参考：https://huggingface.co/docs/transformers/llm_tutorial#wrong-padding-side
tokenizer.padding_side = "left"


In [5]:
# prompt templateを作成
template = Template("""<bos><start_of_turn>user
Consider whther this user would recommend the product based on the reviews.
Answer Yes/No
# Review:
Age: {{age}}
Title: {{title}}
{{review_text}}

<start_of_turn>model
Answer: {{label}}
""")


In [6]:
# prompt templateの動作確認
tmp_input = template.render(age="20", title="nice", review_text="I like it. I recommend it.", label="Yes")
print(tmp_input)


<bos><start_of_turn>user
Consider whther this user would recommend the product based on the reviews.
Answer Yes/No
# Review:
Age: 20
Title: nice
I like it. I recommend it.

<start_of_turn>model
Answer: Yes


In [7]:
# tokenizerの動作確認
tokenizer(tmp_input, add_special_tokens=False, truncation=False)


{'input_ids': [2, 106, 1645, 108, 23171, 733, 790, 736, 2425, 1134, 5656, 573, 3225, 3482, 611, 573, 7796, 235265, 108, 1261, 6287, 235283, 1294, 108, 235345, 5973, 235292, 108, 11345, 235292, 235248, 235284, 235276, 108, 4883, 235292, 4866, 108, 235285, 1154, 665, 235265, 590, 5656, 665, 235265, 109, 106, 2516, 108, 1261, 235292, 6287], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [8]:
# 前処理
def preprocess_row(row, tokenizer):
    if row["labels"] == 0:
        label_str = "No"
    else:
        label_str = "Yes"

    input_text = template.render(
        age=row["Age"],
        title=row["Title"].strip(),
        review_text=row["Review Text"].strip(),
        label=label_str,
    )
    item = tokenizer(input_text, add_special_tokens=False, truncation=False)
    return dict(item)


def preprocess_df(df, tokenizer):
    items = []
    for row in df.iter_rows(named=True):
        items.append(preprocess_row(row, tokenizer))

    df = pl.concat([df, pl.DataFrame(items)], how="horizontal")
    return df


In [9]:
# 前処理
train_df = preprocess_df(train_df, tokenizer)

# 確認
pl.Config.set_fmt_table_cell_list_len(100)
train_df.head(2)


Clothing ID,Age,Title,Review Text,Rating,labels,Positive Feedback Count,input_ids,attention_mask
i64,i64,str,str,i64,i64,i64,list[i64],list[i64]
0,25,"""3-season skirt!""","""Adorable, well-made skirt! lin…",5,1,4,"[2, 106, 1645, 108, 23171, 733, 790, 736, 2425, 1134, 5656, 573, 3225, 3482, 611, 573, 7796, 235265, 108, 1261, 6287, 235283, 1294, 108, 235345, 5973, 235292, 108, 11345, 235292, 235248, 235284, 235308, 108, 4883, 235292, 235248, 235304, 235290, 19802, 23210, 235341, 108, 146769, 235269, 1578, 235290, 8672, 23210, 235341, 33051, 578, 1508, 225891, 235265, 496, 1093, 577, 2395, 908, 518, 235283, 235260, 665, 10140, 476, 3298, 68038, 2449, 573, 23663, 235265, 4187, 235269, 665, 235303, 235256, 5123, 665, 518, 235283, 235260, 736, 877, 4810, 1767, 1497, 578, 3309, 17460, 25920, 235341, 109, 106, 2516, 108, 1261, 235292, 6287]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"
0,39,"""Very cute""","""Love the asymmetrical hem. wai…",5,1,0,"[2, 106, 1645, 108, 23171, 733, 790, 736, 2425, 1134, 5656, 573, 3225, 3482, 611, 573, 7796, 235265, 108, 1261, 6287, 235283, 1294, 108, 235345, 5973, 235292, 108, 11345, 235292, 235248, 235304, 235315, 108, 4883, 235292, 11681, 9786, 108, 8703, 573, 150531, 15112, 235265, 23663, 3806, 68038, 605, 685, 575, 13596, 235265, 665, 23572, 575, 1378, 18881, 675, 476, 5915, 54011, 685, 1578, 235265, 476, 2356, 26979, 575, 573, 9662, 235265, 8889, 9276, 901, 7697, 780, 577, 2745, 1297, 1861, 496, 3528, 235303, 235251, 2375, 1154, 783, 1538, 8044, 665, 832, 1162, 5094, 235265, 139, 2189, 235303, 235251, 27675, … 6287]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, … 1]"


In [10]:
# データセット
class AtmaDataset(Dataset):
    def __init__(self, df):
        self.df = df.to_pandas()

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        inputs = {"input_ids": row["input_ids"]}

        return inputs


In [11]:
ds = AtmaDataset(train_df)
data_collator = DataCollatorForCompletionOnlyLM("Answer:", tokenizer=tokenizer)
batch = next(iter(DataLoader(ds, batch_size=4, collate_fn=data_collator)))


In [12]:
batch["input_ids"][0]


tensor([     0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      2,    106,   1645,    108,  23171,
           733,    790,    736,   2425,   1134,   5656,    573,   3225,   3482,
           611,    573,   7796, 235265,    108,   1261,   6287, 235283,   1294,
           108, 235345,   5973, 235292, 

In [13]:
batch["labels"][0]


tensor([-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -1

In [14]:
print(tokenizer.decode(batch["input_ids"][0]))


<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><bos><start_of_turn>user
Consider whther this user would recommend the product based on the reviews.
Answer Yes/No
# Review:
Age: 25
Title: 3-season skirt!
Adorable, well-made skirt! lined and very slimming. i had to size up b/c it runs a bit snug around the waist. however, it's worth it b/c this will match many long and short sleeve tops!

<start_of_turn>model
Answer: Yes


In [15]:
# setup model function
def setup_model():
    # LoRA config
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        task_type=TaskType.CAUSAL_LM,
        bias="none",
        inference_mode=False,
        target_modules=(
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ),
    )

    # model
    model = AutoModelForCausalLM.from_pretrained(
        cfg.gemma.params.model_path,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        # device_map="auto",
        token=os.environ["HF_TOKEN"],
    )

    model.config.use_cache = False  # キャッシュを使用しない(後にgradient checkpointingを有効にするため必要)
    model = prepare_model_for_kbit_training(model)  # 量子化したモデルをファインチューニング可能にする
    model = get_peft_model(model, lora_config)
    # model.print_trainable_parameters()

    return model


### 学習

In [16]:
# 実験結果格納用のディレクトリを作成
cfg.run_name = time.strftime("%Y%m%d_%H%M%S")
Path(cfg.data.results_dir).mkdir(exist_ok=True, parents=True)

y_train = train_df["labels"].to_numpy()
oof = np.zeros(len(y_train))

for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, y_train)):
    # setup model
    model = setup_model()

    trn_df = train_df[train_idx]
    val_df = train_df[val_idx]

    output_dir = os.path.join(cfg.data.results_dir, f"fold{fold}")

    training_args = TrainingArguments(
        output_dir=output_dir,
        overwrite_output_dir=False,
        log_level="error",
        logging_steps=10,
        logging_strategy="steps",
        eval_strategy="steps",
        eval_steps=50,
        metric_for_best_model="loss",
        save_strategy="epoch",
        save_total_limit=1,
        num_train_epochs=1,
        # optim="paged_adamw_8bit",
        optim="adamw_hf",
        lr_scheduler_type="linear",
        warmup_ratio=0.1,
        learning_rate=1e-4,
        weight_decay=0.01,
        bf16=is_torch_bf16_gpu_available(),
        fp16=not is_torch_bf16_gpu_available(),
        per_device_train_batch_size=1,
        per_device_eval_batch_size=1,
        gradient_accumulation_steps=16,
        gradient_checkpointing=True,
        gradient_checkpointing_kwargs={"use_reentrant": False},
        group_by_length=False,
        report_to="none",
        seed=42,
        remove_unused_columns=False,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=AtmaDataset(trn_df),
        eval_dataset=AtmaDataset(val_df),
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    trainer.train()

    final_output_dir = f"{cfg.data.results_dir}/fold{fold}/final"
    trainer.save_model(final_output_dir)
    tokenizer.save_pretrained(final_output_dir)


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


trainable params: 54,018,048 || all params: 9,295,724,032 || trainable%: 0.5811


Step,Training Loss,Validation Loss
50,0.2248,0.320573
100,0.3268,0.25204
150,0.2248,0.15785
200,0.2408,0.190777
250,0.2529,0.172395
300,0.1143,0.14864
350,0.1227,0.156239
400,0.2289,0.144959
450,0.1072,0.145764


trainable params: 54,018,048 || all params: 9,295,724,032 || trainable%: 0.5811
{'loss': 0.734, 'grad_norm': 4.504663944244385, 'learning_rate': 2.1276595744680852e-05, 'epoch': 0.021333333333333333}


KeyboardInterrupt: 

### Sub


In [64]:
# inference function


@torch.no_grad()
def inference(ckpt_path: str, df: pl.DataFrame) -> np.ndarray:
    dl = DataLoader(AtmaDataset(df), batch_size=1, shuffle=False, collate_fn=data_collator)

    model = AutoModelForCausalLM.from_pretrained(
        cfg.gemma.params.model_path, torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["HF_TOKEN"]
    )
    model = PeftModel.from_pretrained(model, ckpt_path)
    model.eval()

    preds = []

    for batch in tqdm(dl):
        _ = batch.pop("labels")
        batch = {k: v.to(device) for k, v in batch.items()}

        # for inference shift
        batch = {k: v[:, :-1] for k, v in batch.items()}
        output = model(**batch)
        logits = output.logits.cpu()

        class_logits = logits[:, -1, [1307, 6287]]
        preds.append(class_logits)

    preds = torch.vstack(preds).numpy()

    del model
    gc.collect()
    torch.cuda.empty_cache()

    return preds


In [65]:
# fold0のモデルでバリデーション

cv_idx_list = list(skf.split(train_df, y_train))
val_idx = cv_idx_list[0][1]

# 推論
preds = inference(final_output_dir, train_df[val_idx])


  0%|          | 0/2500 [00:00<?, ?it/s]

In [71]:
# auc(CVスコア)

roc_auc_score(train_df[val_idx]["labels"].to_numpy(), torch.softmax(torch.tensor(preds), dim=1).numpy()[:, -1])


0.9772776384815273

In [75]:
# test_dfの前処理

test_df = test_df.with_columns(
    pl.lit(0).alias("labels")
)  # 前処理用にダミーのラベルを追加(推論時に一つずらすので実質的には不使用)
test_df = preprocess_df(test_df, tokenizer)
test_df.head(3)


Clothing ID,Age,Title,Review Text,Positive Feedback Count,labels,input_ids,attention_mask
i64,i64,str,str,i64,i32,list[i64],list[i64]
0,32,"""So happy i bought this skirt!""","""I love this skirt. it does run…",0,0,"[2, 106, 1645, 108, 23171, 733, 790, 736, 2425, 1134, 5656, 573, 3225, 3482, 611, 573, 7796, 235265, 108, 1261, 6287, 235283, 1294, 108, 235345, 5973, 235292, 108, 11345, 235292, 235248, 235304, 235284, 108, 4883, 235292, 1704, 4915, 496, 8989, 736, 23210, 235341, 108, 235285, 2182, 736, 23210, 235265, 665, 1721, 2060, 476, 2356, 2301, 235269, 496, 11538, 671, 3437, 2301, 578, 665, 603, 11818, 696, 573, 23663, 235269, 901, 2076, 3598, 235265, 496, 791, 4956, 978, 75629, 611, 736, 23210, 1178, 7669, 4341, 496, 791, 14987, 235265, 496, 235303, 524, 1125, 11892, 696, 573, 38701, 4659, 731, 1461, … 1307]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, … 1]"
0,34,"""Runs small""","""Beautiful patterns and colors,…",0,0,"[2, 106, 1645, 108, 23171, 733, 790, 736, 2425, 1134, 5656, 573, 3225, 3482, 611, 573, 7796, 235265, 108, 1261, 6287, 235283, 1294, 108, 235345, 5973, 235292, 108, 11345, 235292, 235248, 235304, 235310, 108, 4883, 235292, 80963, 2301, 108, 22106, 12136, 578, 9276, 235269, 901, 665, 27505, 1508, 1536, 578, 10140, 2301, 235265, 496, 235303, 235262, 16342, 476, 2395, 235248, 235310, 578, 2371, 573, 2301, 235265, 143682, 832, 573, 1703, 235269, 901, 611, 573, 68038, 2857, 235265, 573, 23210, 27505, 712, 1536, 674, 573, 9662, 5163, 1706, 729, 476, 3298, 36919, 235265, 1582, 476, 21353, 1861, 665, 2277, 603, … 1307]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, … 1]"
0,37,"""Love the comfort of thi skirt""","""It is easily paired with a nic…",0,0,"[2, 106, 1645, 108, 23171, 733, 790, 736, 2425, 1134, 5656, 573, 3225, 3482, 611, 573, 7796, 235265, 108, 1261, 6287, 235283, 1294, 108, 235345, 5973, 235292, 108, 11345, 235292, 235248, 235304, 235324, 108, 4883, 235292, 7377, 573, 11852, 576, 3288, 23210, 108, 1718, 603, 6645, 44600, 675, 476, 4866, 11581, 604, 1160, 578, 12051, 1775, 577, 8044, 832, 1744, 109, 106, 2516, 108, 1261, 235292, 1307]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"


In [76]:
# test_dfの推論

test_preds = inference(final_output_dir, test_df)


  0%|          | 0/11155 [00:00<?, ?it/s]

In [77]:
# 提出

sub_df = pl.read_csv(cfg.data.sample_submission_path)
sub_df = sub_df.with_columns(pl.Series(torch.softmax(torch.tensor(test_preds), dim=1).numpy()[:, -1]).alias("target"))
sub_df.write_csv(os.path.join(cfg.data.results_dir, f"{cfg.run_name}_submission.csv"))
sub_df.head()


target
f32
0.99929
0.268941
0.99929
0.148047
0.99883
