In [1]:
import os
import pickle
RANDOM_STATE = 42
import tqdm
import numpy as np
import pandas as pd
import torch
import torchmetrics
import logging
import pytorch_lightning as pl
pl.seed_everything(RANDOM_STATE)
from copy import deepcopy
from functools import partial

from omegaconf import DictConfig

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedKFold
from pytorch_lightning.loggers import TensorBoardLogger

from ptls.nn import TrxEncoder, RnnSeqEncoder, Head, L2NormEncoder
from ptls.data_load.utils import collate_feature_dict
from ptls.data_load.datasets import MemoryMapDataset, AugmentationDataset
from ptls.data_load.padded_batch import PaddedBatch
from ptls.data_load.iterable_processing import SeqLenFilter
from ptls.preprocessing import PandasDataPreprocessor
from ptls.frames import PtlsDataModule
from ptls.frames.coles import CoLESModule, ColesDataset
from ptls.frames.coles.losses import SoftmaxLoss
from ptls.frames.coles.metric import BatchRecallTopK
from ptls.frames.coles.split_strategy import SampleSlices
from ptls.frames.inference_module import InferenceModule
from ptls.data_load.augmentations import RandomSlice, DropoutTrx
import warnings
warnings.filterwarnings("ignore")


libgomp: Invalid value for environment variable OMP_NUM_THREADS
Global seed set to 42

libgomp: Invalid value for environment variable OMP_NUM_THREADS


In [2]:
class SeqToTargetDataset(torch.utils.data.Dataset):
    def __init__(self,
                 data,
                 target_col_name,
                 target_dtype=None,
                 *args, **kwargs,
                 ):
        super().__init__(*args, **kwargs)

        self.data = data
        
        self.target_col_name = target_col_name
        if type(target_dtype) is str:
            self.target_dtype = getattr(torch, target_dtype)
        else:
            self.target_dtype = target_dtype

    def __len__(self):
        return len(self.data)

    def __getitem__(self, item):
        feature_arrays = self.data[item]
        return feature_arrays

    def __iter__(self):
        for feature_arrays in self.data:
            yield feature_arrays

    def collate_fn(self, padded_batch):
        padded_batch = collate_feature_dict(padded_batch)
        
        target = padded_batch.payload[self.target_col_name]
        time = padded_batch.payload['time']
        del padded_batch.payload[self.target_col_name]
        if self.target_dtype is not None:
            target = target.to(dtype=self.target_dtype)

        return padded_batch, target, time


class SeqToTargetIterableDataset(SeqToTargetDataset, torch.utils.data.IterableDataset):
    pass

In [3]:
class SequenceToTarget(pl.LightningModule):
    def __init__(
        self,
        seq_encoder: torch.nn.Module,
        head: torch.nn.Module=None,
        head_time: torch.nn.Module=None,
        loss: torch.nn.Module=None,
        metric_list: torchmetrics.Metric=None,
        optimizer_partial=None,
        lr_scheduler_partial=None,
        pretrained_lr=None,
        train_update_n_steps=None
    ):
        super().__init__()

        self.save_hyperparameters(ignore=[
            'seq_encoder', 'head', 'head_time', 'loss',
            'metric_list', 'optimizer_partial', 'lr_scheduler_partial'
        ])
        self.seq_encoder = seq_encoder
        self.head = head
        self.head_time = head_time
        self.loss = loss

        if type(metric_list) is dict or type(metric_list) is DictConfig:
            metric_list = [(k, v) for k, v in metric_list.items()]
        else:
            if type(metric_list) is not list:
                metric_list = [metric_list]
            metric_list = [(m.__class__.__name__, m) for m in metric_list]

        self.train_metrics = torch.nn.ModuleDict([(name, deepcopy(mc)) for name, mc in metric_list])
        self.valid_metrics = torch.nn.ModuleDict([(name, deepcopy(mc)) for name, mc in metric_list])

        self.optimizer_partial = optimizer_partial
        self.lr_scheduler_partial = lr_scheduler_partial
        
    def forward(self, x):
        add_features = None
        
        if isinstance(x, tuple):
            x, add_features = x

        x = self.seq_encoder(x)
        
        if self.head is not None:
            y_h = self.head(x)
        else:
            y_h = x
            
        t_h = self.head_time(x)
        
        return y_h, t_h

    def training_step(self, batch, _):
        x, y, t = batch
        y_h, t_h = self(x)

        loss = self.loss(y_h, y)
        self.log('loss/train_loss', loss)
        for name, mf in self.train_metrics.items():
            mf(y_h, y)
            
        loss_t = (t_h - t / 100.0).pow(2).mean()
        self.log('loss/loss_time', loss_t)
        return loss + 0.1 * loss_t

    def training_epoch_end(self, outputs):
        for name, mf in self.train_metrics.items():
            self.log(f'{name}/train', mf.compute(), prog_bar=False)
        for name, mf in self.train_metrics.items():
            mf.reset()

    def validation_step(self, batch, _):
        x, y, t = batch
        y_h, t_h = self(x)
        self.log('loss/valid', self.loss(y_h, y))
        for name, mf in self.valid_metrics.items():
            mf(y_h, y)

    def validation_epoch_end(self, outputs):
        for name, mf in self.valid_metrics.items():
            self.log(f'{name}/valid', mf.compute(), prog_bar=True)
        for name, mf in self.valid_metrics.items():
            mf.reset()

    def configure_optimizers(self):
        if self.hparams.pretrained_lr is not None:
            if self.hparams.pretrained_lr == 'freeze':
                for p in self.seq_encoder.parameters():
                    p.requires_grad = False
                parameters = self.parameters()
            else:
                parameters = [
                    {'params': self.seq_encoder.parameters(), 'lr': self.hparams.pretrained_lr},
                    {'params': self.head.parameters()},  # use predefined lr from `self.optimizer_partial`
                ]
        else:
            parameters = self.parameters()

        optimizer = self.optimizer_partial(parameters)
        scheduler = self.lr_scheduler_partial(optimizer)
        return [optimizer], [scheduler]

In [8]:
PATH = '/home/jovyan/sakhno/datafusion-2024/'

In [9]:
train = pd.read_csv(
    os.path.join(PATH, 'data/train.csv'),
    low_memory=False
)

In [10]:
transactions = pd.read_csv(
    os.path.join(PATH, 'prepared_data/transactions.csv'),
    parse_dates=['transaction_dttm'],
    low_memory=False,
)
transactions['hour'] = transactions['transaction_dttm'].dt.hour

In [11]:
trx_preprocessor = PandasDataPreprocessor(
    col_id='user_id',
    col_event_time='transaction_dttm',
    event_time_transformation='dt_to_timestamp',
    cols_category=[
        'mcc_code',
        'currency_rk',
        'day_of_week',
        'is_day_off',
        'hour',
    ],
    cols_numerical=[
        'transaction_amt',
        'days_from_first_tr',
        'days_from_prev_tr',
        'hours_from_first_tr',
        'hours_from_prev_tr',
        'days_to_report',
    ],
    return_records=False,
)

In [12]:
transactions_ = trx_preprocessor.fit_transform(transactions)

In [13]:
df = transactions_.copy()
df = pd.merge(df, train[['user_id', 'target', 'time']], on='user_id')

df_train, df_test = train_test_split(df, random_state=42, test_size=0.2)

In [14]:
def inference(model, dl, device='cuda:0'):
    logits = []
    model.to(device)
    softmax = torch.nn.Softmax(dim=0) 
    for batch in tqdm.tqdm(dl, position=0, leave=True):
        with torch.no_grad():
            x, _, _ = batch
            y_h, t_h = model(x.to(device))
            logits.extend([y_h.cpu()])
        
    logits = softmax(torch.vstack(logits)[:, 1]).cpu()
    
    torch.cuda.empty_cache()

    return logits

In [15]:
trx_encoder_params = dict(
    numeric_values={
        'transaction_amt': 'log',
        'days_from_first_tr': 'identity',
        'days_from_prev_tr': 'log',
        'hours_from_first_tr': 'identity',
        'hours_from_prev_tr': 'log',
        'days_to_report': 'log',
    },
    embeddings={
        'currency_rk': {'in': 5, 'out': 16},
        'day_of_week': {'in': 8, 'out': 16},
        'mcc_code': {'in': 330, 'out': 32},
        'is_day_off': {'in': 4, 'out': 2},
        'hour': {'in': 30, 'out': 4}
    },
    use_batch_norm_with_lens=True,
)

seq_encoder = RnnSeqEncoder(
    trx_encoder=TrxEncoder(**trx_encoder_params),
    hidden_size=800,
    type='gru',
)


In [16]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state = RANDOM_STATE)

In [17]:
models = []

In [18]:
predictions_5folds = []

In [19]:
a = torch.load('model/seq_encoder.pt')
a_cpy = a.copy()
for key, value in a.items():
    if 'numerical' in key:
        a_cpy[key.replace('numerical', 'custom_embedding')] = value
        del a_cpy[key]

In [21]:
for i, (train_index, test_index) in enumerate(skf.split(df_train.drop(columns=['target']), df_train['target'])):
    train_, test_ = df_train.iloc[train_index], df_train.iloc[test_index]
    
    dataset_train = train_.to_dict(orient='records')
    dataset_test = test_.to_dict(orient='records')
    
    sup_dataset = PtlsDataModule(
        train_data=SeqToTargetDataset(
            AugmentationDataset(
                dataset_train,
                f_augmentations=[
                    DropoutTrx(0.1),
                ],
            ),
            target_col_name='target',
            target_dtype=torch.long,
        ),
        valid_data=SeqToTargetDataset(
            dataset_test,
            target_col_name='target',
            target_dtype=torch.long,
        ),
        train_batch_size=256,
        train_num_workers=8,
        train_drop_last=True,

        valid_batch_size=256,
        valid_num_workers=8,
        valid_drop_last=True
    )
    
    seq_encoder.load_state_dict(a_cpy)

    sup_module = SequenceToTarget(
        seq_encoder=seq_encoder,
        head=torch.nn.Sequential(
            torch.nn.Linear(seq_encoder.embedding_size, 512),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(512, 2),
            torch.nn.LogSoftmax(dim=1),
        ),
        head_time=torch.nn.Sequential(
            torch.nn.Linear(seq_encoder.embedding_size, 1),
        ),
        loss=torch.nn.NLLLoss(),
        metric_list=torchmetrics.AUROC(num_classes=2),
        optimizer_partial=partial(torch.optim.AdamW, lr=1e-4, weight_decay=0.0),
        lr_scheduler_partial=partial(torch.optim.lr_scheduler.StepLR, step_size=1, gamma=0.2),
    )
    
    # sup_module.load_state_dict(torch.load(f"model/sup_modules-kfold/model-1.{i}.pt"))
    
    trainer = pl.Trainer(
        logger=TensorBoardLogger('lightning_logs', name=f'CoLES-supervised-agg_{i}'),
        max_epochs=6,
        gpus=1 if torch.cuda.is_available() else 0,
        enable_progress_bar=True,
        gradient_clip_algorithm='norm',
        gradient_clip_val=0.18
    )
    
    trainer.fit(sup_module, sup_dataset)
    
    torch.save(sup_module.state_dict(), f"model/sup_modules-kfold/model-1.{i}.pt")
    
    predictions_test = test_[["user_id"]].copy()
    
    dataset = SeqToTargetDataset(
        data=dataset_test,
        target_col_name='target',
    )

    dl = torch.utils.data.DataLoader(
        dataset=dataset,
        collate_fn=dataset.collate_fn,
        shuffle=False,
        batch_size=512,
        num_workers=4,
    )
    
    predictions_test["sp"] = inference(sup_module, dl)
    
    predictions_5folds.append(predictions_test)
    
    print(12*"-")
    print("AUROC; 5th fold:", roc_auc_score(test_["target"].values, predictions_test["sp"]))
    print(12*"-")
    
    models.append(deepcopy(sup_module))

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type          | Params
------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder | 2.1 M 
1 | head          | Sequential    | 411 K 
2 | head_time     | Sequential    | 801   
3 | loss          | NLLLoss       | 0     
4 | train_metrics | ModuleDict    | 0     
5 | valid_metrics | ModuleDict    | 0     
------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

100%|██████████| 20/20 [00:02<00:00,  8.20it/s]


------------
AUROC; 5th fold: 0.7303968896059996
------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type          | Params
------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder | 2.1 M 
1 | head          | Sequential    | 411 K 
2 | head_time     | Sequential    | 801   
3 | loss          | NLLLoss       | 0     
4 | train_metrics | ModuleDict    | 0     
5 | valid_metrics | ModuleDict    | 0     
------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

100%|██████████| 20/20 [00:02<00:00,  7.65it/s]


------------
AUROC; 5th fold: 0.7312295277355122
------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs/CoLES-supervised-agg_2
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type          | Params
------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder | 2.1 M 
1 | head          | Sequential    | 411 K 
2 | head_time     | Sequential    | 801   
3 | loss          | NLLLoss       | 0     
4 | train_metrics | ModuleDict    | 0     
5 | valid_metrics | ModuleDict    | 0     
------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

100%|██████████| 20/20 [00:02<00:00,  7.54it/s]


------------
AUROC; 5th fold: 0.7340950298634813
------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs/CoLES-supervised-agg_3
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type          | Params
------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder | 2.1 M 
1 | head          | Sequential    | 411 K 
2 | head_time     | Sequential    | 801   
3 | loss          | NLLLoss       | 0     
4 | train_metrics | ModuleDict    | 0     
5 | valid_metrics | ModuleDict    | 0     
------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

100%|██████████| 20/20 [00:02<00:00,  7.05it/s]


------------
AUROC; 5th fold: 0.717170415402604
------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs/CoLES-supervised-agg_4
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type          | Params
------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder | 2.1 M 
1 | head          | Sequential    | 411 K 
2 | head_time     | Sequential    | 801   
3 | loss          | NLLLoss       | 0     
4 | train_metrics | ModuleDict    | 0     
5 | valid_metrics | ModuleDict    | 0     
------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

100%|██████████| 20/20 [00:02<00:00,  6.84it/s]


------------
AUROC; 5th fold: 0.7265722520462015
------------


In [22]:
predictions_5folds = pd.concat(predictions_5folds, axis=0)

In [23]:
temp = predictions_5folds.merge(train[["user_id", "target"]], on="user_id")

In [24]:
print(12*"-")
print("AUROC; 5th fold:", roc_auc_score(temp["target"].values, temp["sp"].values))
print(12*"-")

------------
AUROC; 5th fold: 0.7279373781131475
------------


In [25]:
dataset_test = df_test.copy()
dataset_test[["target", "time"]] = None
dataset_test = dataset_test.to_dict(orient='records')

dataset = SeqToTargetDataset(
    data=dataset_test,
    target_col_name='target',
)

dl = torch.utils.data.DataLoader(
    dataset=dataset,
    collate_fn=dataset.collate_fn,
    shuffle=False,
    batch_size=512,
    num_workers=4,
)

predictions_test = df_test[["user_id"]].copy()

for i in range(5):
    predictions_test[f"sp_{i}"] = inference(models[i], dl)

predictions_test

100%|██████████| 25/25 [00:03<00:00,  7.14it/s]
100%|██████████| 25/25 [00:03<00:00,  6.78it/s]
100%|██████████| 25/25 [00:03<00:00,  6.80it/s]
100%|██████████| 25/25 [00:03<00:00,  6.59it/s]
100%|██████████| 25/25 [00:03<00:00,  6.77it/s]


Unnamed: 0,user_id,sp_0,sp_1,sp_2,sp_3,sp_4
33042,287176,0.000035,0.000033,0.000030,0.000037,0.000051
7209,61447,0.000048,0.000059,0.000040,0.000069,0.000041
24367,210716,0.000017,0.000018,0.000013,0.000017,0.000023
19599,168576,0.000031,0.000035,0.000031,0.000035,0.000033
31465,273446,0.000083,0.000075,0.000067,0.000082,0.000067
...,...,...,...,...,...,...
29518,256542,0.000198,0.000171,0.000171,0.000200,0.000273
40886,354734,0.000028,0.000035,0.000032,0.000032,0.000033
19792,170336,0.000033,0.000034,0.000039,0.000037,0.000032
31101,270218,0.000025,0.000024,0.000039,0.000029,0.000026


In [26]:
predictions_test["sp"] = predictions_test.iloc[:, 1:].mean(axis=1)
predictions_test

Unnamed: 0,user_id,sp_0,sp_1,sp_2,sp_3,sp_4,sp
33042,287176,0.000035,0.000033,0.000030,0.000037,0.000051,0.000037
7209,61447,0.000048,0.000059,0.000040,0.000069,0.000041,0.000051
24367,210716,0.000017,0.000018,0.000013,0.000017,0.000023,0.000018
19599,168576,0.000031,0.000035,0.000031,0.000035,0.000033,0.000033
31465,273446,0.000083,0.000075,0.000067,0.000082,0.000067,0.000075
...,...,...,...,...,...,...,...
29518,256542,0.000198,0.000171,0.000171,0.000200,0.000273,0.000203
40886,354734,0.000028,0.000035,0.000032,0.000032,0.000033,0.000032
19792,170336,0.000033,0.000034,0.000039,0.000037,0.000032,0.000035
31101,270218,0.000025,0.000024,0.000039,0.000029,0.000026,0.000029


In [27]:
print(12*"-")
print("AUROC; 5th fold:", roc_auc_score(df_test["target"].values, predictions_test["sp"].values))
print(12*"-")

------------
AUROC; 5th fold: 0.7573031212422913
------------


In [28]:
train_test_predictions = pd.concat([predictions_5folds, predictions_test[["user_id", "sp"]]], axis=0)

In [29]:
train_test_predictions.to_csv("sp-preds_train-test.csv", index=False)

---

In [30]:
sample_submit = pd.read_csv(os.path.join(PATH, "data/sample_submit_naive.csv"))
sample_submit

Unnamed: 0,user_id,predict
0,9,-3.184396
1,61,-2.623560
2,62,-2.840654
3,80,-3.269438
4,88,-2.068500
...,...,...
31995,561362,-2.438619
31996,561419,-2.438619
31997,561895,-2.437339
31998,561908,-2.437339


In [31]:
sbmt_df = transactions_.copy()
sbmt_df = pd.merge(sbmt_df, sample_submit[['user_id']], on='user_id')
sbmt_df[["target", "time"]] = None

In [32]:
sbmt_models = []

In [None]:
for i, (train_index, test_index) in enumerate(skf.split(df.drop(columns=['target']), df["target"])):
    train_, test_ = df.iloc[train_index], df.iloc[test_index]
    
    dataset_train = train_.to_dict(orient='records')
    dataset_test = test_.to_dict(orient='records')
    
    sup_dataset = PtlsDataModule(
        train_data=SeqToTargetDataset(
            AugmentationDataset(
                dataset_train,
                f_augmentations=[
                    DropoutTrx(0.1),
                ],
            ),
            target_col_name='target',
            target_dtype=torch.long,
        ),
        valid_data=SeqToTargetDataset(
            dataset_test,
            target_col_name='target',
            target_dtype=torch.long,
        ),
        train_batch_size=256,
        train_num_workers=8,
        train_drop_last=True,

        valid_batch_size=256,
        valid_num_workers=8,
        valid_drop_last=True
    )
    
    seq_encoder.load_state_dict(a_cpy)

    sup_module = SequenceToTarget(
        seq_encoder=seq_encoder,
        head=torch.nn.Sequential(
            torch.nn.Linear(seq_encoder.embedding_size, 512),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(512, 2),
            torch.nn.LogSoftmax(dim=1),
        ),
        head_time=torch.nn.Sequential(
            torch.nn.Linear(seq_encoder.embedding_size, 1),
        ),
        loss=torch.nn.NLLLoss(),
        metric_list=torchmetrics.AUROC(num_classes=2),
        optimizer_partial=partial(torch.optim.AdamW, lr=4e-4, weight_decay=0.0),
        lr_scheduler_partial=partial(torch.optim.lr_scheduler.StepLR, step_size=1, gamma=0.2),
    )
    
    trainer = pl.Trainer(
        logger=TensorBoardLogger('lightning_logs', name=f'CoLES-supervised-agg-sbmt_{i}'),
        max_epochs=7,
        gpus=1 if torch.cuda.is_available() else 0,
        enable_progress_bar=True,
        gradient_clip_algorithm='norm',
        gradient_clip_val=0.2
    )
    
    trainer.fit(sup_module, sup_dataset)
    
    torch.save(sup_module.state_dict(), f"model/sup_modules-kfold/sbmt-model-0.{i}.pt")
    
    dataset = SeqToTargetDataset(
        data=dataset_test,
        target_col_name='target',
    )

    dl = torch.utils.data.DataLoader(
        dataset=dataset,
        collate_fn=dataset.collate_fn,
        shuffle=False,
        batch_size=512,
        num_workers=4,
    )
    
    predictions_test = inference(sup_module, dl)
    
    print(12*"-")
    print("AUROC; 5th fold:", roc_auc_score(test_["target"].values, predictions_test))
    print(12*"-")
    
    sbmt_models.append(deepcopy(sup_module))

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs/CoLES-supervised-agg-sbmt_0
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type          | Params
------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder | 2.1 M 
1 | head          | Sequential    | 411 K 
2 | head_time     | Sequential    | 801   
3 | loss          | NLLLoss       | 0     
4 | train_metrics | ModuleDict    | 0     
5 | valid_metrics | ModuleDict    | 0     
------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

100%|██████████| 25/25 [00:03<00:00,  7.54it/s]


------------
AUROC; 5th fold: 0.7547174197650328
------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs/CoLES-supervised-agg-sbmt_1
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type          | Params
------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder | 2.1 M 
1 | head          | Sequential    | 411 K 
2 | head_time     | Sequential    | 801   
3 | loss          | NLLLoss       | 0     
4 | train_metrics | ModuleDict    | 0     
5 | valid_metrics | ModuleDict    | 0     
------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

100%|██████████| 25/25 [00:05<00:00,  4.25it/s]


------------
AUROC; 5th fold: 0.7590074983976923
------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs/CoLES-supervised-agg-sbmt_2
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type          | Params
------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder | 2.1 M 
1 | head          | Sequential    | 411 K 
2 | head_time     | Sequential    | 801   
3 | loss          | NLLLoss       | 0     
4 | train_metrics | ModuleDict    | 0     
5 | valid_metrics | ModuleDict    | 0     
------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

100%|██████████| 25/25 [00:03<00:00,  7.00it/s]


------------
AUROC; 5th fold: 0.7389864747349209
------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs/CoLES-supervised-agg-sbmt_3
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type          | Params
------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder | 2.1 M 
1 | head          | Sequential    | 411 K 
2 | head_time     | Sequential    | 801   
3 | loss          | NLLLoss       | 0     
4 | train_metrics | ModuleDict    | 0     
5 | valid_metrics | ModuleDict    | 0     
------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [None]:
dataset_sbmt = sbmt_df.copy()
dataset_sbmt[["target", "time"]] = None
dataset_sbmt = dataset_sbmt.to_dict(orient='records')

dataset = SeqToTargetDataset(
    data=dataset_sbmt,
    target_col_name='target',
)

dl = torch.utils.data.DataLoader(
    dataset=dataset,
    collate_fn=dataset.collate_fn,
    shuffle=False,
    batch_size=512,
    num_workers=4,
)

predictions_sbmt = sbmt_df[["user_id"]].copy()

for i in range(5):
    predictions_sbmt[f"sp_{i}"] = inference(sbmt_models[i], dl)

predictions_sbmt

In [None]:
predictions_sbmt["sp"] = predictions_sbmt.iloc[:, 1:].mean(axis=1)
predictions_sbmt

In [None]:
predictions_sbmt[["user_id", "sp"]].to_csv(f"sp-preds_sbmt{RANDOM_STATE}.csv", index=False)