https://www.kaggle.com/vikazrajpurohit/3-model-training-and-inference

# Features

In [1]:
# import
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import seaborn as sns
 
from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
 
import warnings
warnings.filterwarnings('ignore')

In [2]:
# data
data_dir = './data/lish-moa/'

train_feat = pd.read_csv(data_dir + 'train_features.csv')
scored = pd.read_csv(data_dir + 'train_targets_scored.csv')
nonscored = pd.read_csv(data_dir + 'train_targets_nonscored.csv')
drugs = pd.read_csv(data_dir + 'train_drug.csv')
test_feat = pd.read_csv(data_dir + 'test_features.csv')
submission = pd.read_csv(data_dir + 'sample_submission.csv')

print('train_features: {}'.format(train_feat.shape))
print('train_targets_scored: {}'.format(scored.shape))
print('train_targets_nonscored: {}'.format(nonscored.shape))
print('train_drug: {}'.format(drugs.shape))
print('test_features: {}'.format(test_feat.shape))
print('sample_submission: {}'.format(submission.shape))

train_features: (23814, 876)
train_targets_scored: (23814, 207)
train_targets_nonscored: (23814, 403)
train_drug: (23814, 2)
test_features: (3982, 876)
sample_submission: (3982, 207)


In [3]:
train_feat2=train_feat.copy()
test_feat2=test_feat.copy()

In [4]:
GENES = [col for col in train_feat.columns if col.startswith('g-')]
CELLS = [col for col in test_feat.columns if col.startswith('c-')]

## QuantileTransformer
将train和test的GENES和CELLS转换为normal distribution

In [5]:
# qt for GENES+CELLS
qt = QuantileTransformer(n_quantiles=100,random_state=42,output_distribution='normal')
data = pd.concat([pd.DataFrame(train_feat[GENES+CELLS]), pd.DataFrame(test_feat[GENES+CELLS])])
data2 = qt.fit_transform(data[GENES+CELLS])

train_feat[GENES+CELLS] = pd.DataFrame(data2[:train_feat.shape[0]])
test_feat[GENES+CELLS] = pd.DataFrame(data2[-test_feat.shape[0]:])

In [6]:
# seed
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)

    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    
seed_everything(42)

In [7]:
# pca for GENES
n_comp = 600  #<--Update

data = pd.concat([pd.DataFrame(train_feat[GENES]), pd.DataFrame(test_feat[GENES])])
data2 = (PCA(n_components=n_comp, random_state=42).fit_transform(data[GENES]))
train2 = data2[:train_feat.shape[0]]; 
test2 = data2[-test_feat.shape[0]:]

train_gpca = pd.DataFrame(train2, columns=[f'pca_G-{i}' for i in range(n_comp)])
test_gpca = pd.DataFrame(test2, columns=[f'pca_G-{i}' for i in range(n_comp)])

# drop_cols = [f'c-{i}' for i in range(n_comp,len(GENES))]
train_feat = pd.concat((train_feat, train_gpca), axis=1)
test_feat = pd.concat((test_feat, test_gpca), axis=1)

In [8]:
# pca for CELLS
n_comp = 50  #<--Update

data = pd.concat([pd.DataFrame(train_feat[CELLS]), pd.DataFrame(test_feat[CELLS])])
data2 = (PCA(n_components=n_comp, random_state=42).fit_transform(data[CELLS]))
train2 = data2[:train_feat.shape[0]]
test2 = data2[-test_feat.shape[0]:]

train_cpca = pd.DataFrame(train2, columns=[f'pca_C-{i}' for i in range(n_comp)])
test_cpca = pd.DataFrame(test2, columns=[f'pca_C-{i}' for i in range(n_comp)])

# drop_cols = [f'c-{i}' for i in range(n_comp,len(CELLS))]
train_feat = pd.concat((train_feat, train_cpca), axis=1)
test_feat = pd.concat((test_feat, test_cpca), axis=1)

In [9]:
train_feat.shape,train_feat.shape

((23814, 1526), (23814, 1526))

In [10]:
# var threshold for GENES CELLS features
var_thresh = VarianceThreshold(0.85)  #<-- Update

data = train_feat.append(test_feat)
data_transformed = var_thresh.fit_transform(data.iloc[:, 4:])

train_features_transformed = data_transformed[ : train_feat.shape[0]]
test_features_transformed = data_transformed[-test_feat.shape[0] : ]


train_feat = pd.DataFrame(train_feat[['sig_id','cp_type','cp_time','cp_dose']].values.reshape(-1, 4),\
                              columns=['sig_id','cp_type','cp_time','cp_dose'])

train_feat = pd.concat([train_feat, pd.DataFrame(train_features_transformed)], axis=1)


test_feat = pd.DataFrame(test_feat[['sig_id','cp_type','cp_time','cp_dose']].values.reshape(-1, 4),\
                             columns=['sig_id','cp_type','cp_time','cp_dose'])

test_feat = pd.concat([test_feat, pd.DataFrame(test_features_transformed)], axis=1)

In [11]:
train_feat.shape,train_feat.shape

((23814, 1028), (23814, 1028))

In [12]:
# cluster feature for GENES and CELLS
from sklearn.cluster import KMeans

def fe_cluster(train, test, n_clusters_g = 22, n_clusters_c = 4, SEED = 42):
    
    features_g = GENES
    features_c = CELLS
    
    def create_cluster(train, test, features, kind = 'g', n_clusters = n_clusters_g):
        train_ = train[features].copy()
        test_ = test[features].copy()
        data = pd.concat([train_, test_], axis = 0)
        kmeans = KMeans(n_clusters = n_clusters, random_state = SEED).fit(data)
        train[f'clusters_{kind}'] = kmeans.labels_[:train.shape[0]]
        test[f'clusters_{kind}'] = kmeans.labels_[train.shape[0]:]
        train = pd.get_dummies(train, columns = [f'clusters_{kind}'])
        test = pd.get_dummies(test, columns = [f'clusters_{kind}'])
        return train, test
    
    train, test = create_cluster(train, test, features_g, kind = 'g', n_clusters = n_clusters_g)
    train, test = create_cluster(train, test, features_c, kind = 'c', n_clusters = n_clusters_c)
    return train, test

train_feat2,test_feat2=fe_cluster(train_feat2,test_feat2)

In [13]:
# cluster for GENES and CELLS PCA features
def fe_cluster_pca(train, test,n_clusters=5,SEED = 42):
        data=pd.concat([train,test],axis=0)
        kmeans = KMeans(n_clusters = n_clusters, random_state = SEED).fit(data)
        train[f'clusters_pca'] = kmeans.labels_[:train.shape[0]]
        test[f'clusters_pca'] = kmeans.labels_[train.shape[0]:]
        train = pd.get_dummies(train, columns = [f'clusters_pca'])
        test = pd.get_dummies(test, columns = [f'clusters_pca'])
        return train, test

train_pca=pd.concat((train_gpca,train_cpca),axis=1)
test_pca=pd.concat((test_gpca,test_cpca),axis=1)
train_cluster_pca ,test_cluster_pca = fe_cluster_pca(train_pca,test_pca)

In [14]:
train_cluster_pca = train_cluster_pca.iloc[:,650:]
test_cluster_pca = test_cluster_pca.iloc[:,650:]

In [15]:
train_features_cluster=train_feat2.iloc[:,876:]
test_features_cluster=test_feat2.iloc[:,876:]

In [16]:
gsquarecols=['g-574','g-211','g-216','g-0','g-255','g-577','g-153','g-389','g-60','g-370','g-248','g-167','g-203','g-177','g-301','g-332','g-517','g-6','g-744','g-224','g-162','g-3','g-736','g-486','g-283','g-22','g-359','g-361','g-440','g-335','g-106','g-307','g-745','g-146','g-416','g-298','g-666','g-91','g-17','g-549','g-145','g-157','g-768','g-568','g-396']
len(gsquarecols)

45

In [17]:
# statistic feature for GENES and CELLS
def fe_stats(train, test):
    
    features_g = GENES
    features_c = CELLS
    
    for df in train, test:
        df['g_sum'] = df[features_g].sum(axis = 1)
        df['g_mean'] = df[features_g].mean(axis = 1)
        df['g_std'] = df[features_g].std(axis = 1)
        df['g_kurt'] = df[features_g].kurtosis(axis = 1)
        df['g_skew'] = df[features_g].skew(axis = 1)
        df['c_sum'] = df[features_c].sum(axis = 1)
        df['c_mean'] = df[features_c].mean(axis = 1)
        df['c_std'] = df[features_c].std(axis = 1)
        df['c_kurt'] = df[features_c].kurtosis(axis = 1)
        df['c_skew'] = df[features_c].skew(axis = 1)
        df['gc_sum'] = df[features_g + features_c].sum(axis = 1)
        df['gc_mean'] = df[features_g + features_c].mean(axis = 1)
        df['gc_std'] = df[features_g + features_c].std(axis = 1)
        df['gc_kurt'] = df[features_g + features_c].kurtosis(axis = 1)
        df['gc_skew'] = df[features_g + features_c].skew(axis = 1)
        
        df['c52_c42'] = df['c-52'] * df['c-42']
        df['c13_c73'] = df['c-13'] * df['c-73']
        df['c26_c13'] = df['c-23'] * df['c-13']
        df['c33_c6'] = df['c-33'] * df['c-6']
        df['c11_c55'] = df['c-11'] * df['c-55']
        df['c38_c63'] = df['c-38'] * df['c-63']
        df['c38_c94'] = df['c-38'] * df['c-94']
        df['c13_c94'] = df['c-13'] * df['c-94']
        df['c4_c52'] = df['c-4'] * df['c-52']
        df['c4_c42'] = df['c-4'] * df['c-42']
        df['c13_c38'] = df['c-13'] * df['c-38']
        df['c55_c2'] = df['c-55'] * df['c-2']
        df['c55_c4'] = df['c-55'] * df['c-4']
        df['c4_c13'] = df['c-4'] * df['c-13']
        df['c82_c42'] = df['c-82'] * df['c-42']
        df['c66_c42'] = df['c-66'] * df['c-42']
        df['c6_c38'] = df['c-6'] * df['c-38']
        df['c2_c13'] = df['c-2'] * df['c-13']
        df['c62_c42'] = df['c-62'] * df['c-42']
        df['c90_c55'] = df['c-90'] * df['c-55']
        
        
        for feature in features_c:
             df[f'{feature}_squared'] = df[feature] ** 2     
                
        for feature in gsquarecols:
            df[f'{feature}_squared'] = df[feature] ** 2        
        
    return train, test

train_feat2,test_feat2=fe_stats(train_feat2,test_feat2)

In [18]:
train_features_stats=train_feat2.iloc[:,902:]
test_features_stats=test_feat2.iloc[:,902:]

In [19]:
train_feat.shape,test_feat.shape

((23814, 1028), (3982, 1028))

In [20]:
train_feat = pd.concat((train_feat, train_features_cluster,train_cluster_pca,train_features_stats), axis=1)
test_feat = pd.concat((test_feat, test_features_cluster,test_cluster_pca,test_features_stats), axis=1)

train_feat.shape,test_feat.shape

((23814, 1239), (3982, 1239))

In [21]:
# train,test
train = train_feat.merge(scored, on='sig_id')
train = train.merge(nonscored, on='sig_id')
train = train.merge(drugs, on='sig_id')
train = train[train['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)
test = test_feat[test_feat['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)

train.shape,test.shape

((21948, 1848), (3624, 1239))

In [22]:
train = train.drop('cp_type', axis=1)
test = test.drop('cp_type', axis=1)

In [23]:
# targets
target_cols = [x for x in scored.columns if x != 'sig_id']
aux_target_cols = [x for x in nonscored.columns if x != 'sig_id']
all_target_cols = target_cols + aux_target_cols

num_targets = len(target_cols)
num_aux_targets = len(aux_target_cols)
num_all_targets = len(all_target_cols)

print('num_targets: {}'.format(num_targets))
print('num_aux_targets: {}'.format(num_aux_targets))
print('num_all_targets: {}'.format(num_all_targets))

num_targets: 206
num_aux_targets: 402
num_all_targets: 608


In [24]:
print(train.shape)
print(test.shape)
print(submission.shape)

(21948, 1847)
(3624, 1238)
(3982, 207)


# Dataset and Train Models

In [25]:
# import

import os
import gc
import random
import math
import time

import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn.metrics import log_loss

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau

import warnings
warnings.filterwarnings("ignore")

# Tabnet 
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor

In [26]:
# SmoothBCEwLogits
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1

        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
            
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [27]:
def process_data(data):
    data = pd.get_dummies(data, columns=['cp_time','cp_dose'])
    return data

feature_cols = [c for c in process_data(train).columns if c not in all_target_cols]
feature_cols = [c for c in feature_cols if c not in ['kfold', 'sig_id', 'drug_id']]
num_features = len(feature_cols)
num_features

1240

In [28]:
# cv_folds
from sklearn.model_selection import KFold
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

def make_cv_folds(train, SEEDS, NFOLDS, DRUG_THRESH):
    vc = train.drug_id.value_counts()
    vc1 = vc.loc[vc <= DRUG_THRESH].index.sort_values()
    vc2 = vc.loc[vc > DRUG_THRESH].index.sort_values()

    for seed_id in range(SEEDS):
        kfold_col = 'kfold_{}'.format(seed_id)
        
        # STRATIFY DRUGS 18X OR LESS
        dct1 = {}
        dct2 = {}

        skf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=seed_id)
        tmp = train.groupby('drug_id')[target_cols].mean().loc[vc1]

        for fold,(idxT, idxV) in enumerate(skf.split(tmp, tmp[target_cols])):
            dd = {k: fold for k in tmp.index[idxV].values}
            dct1.update(dd)

        # STRATIFY DRUGS MORE THAN 18X
        skf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=seed_id)
        tmp = train.loc[train.drug_id.isin(vc2)].reset_index(drop=True)

        for fold,(idxT, idxV) in enumerate(skf.split(tmp, tmp[target_cols])):
            dd = {k: fold for k in tmp.sig_id[idxV].values}
            dct2.update(dd)

        # ASSIGN FOLDS
        train[kfold_col] = train.drug_id.map(dct1)
        train.loc[train[kfold_col].isna(), kfold_col] = train.loc[train[kfold_col].isna(), 'sig_id'].map(dct2)
        train[kfold_col] = train[kfold_col].astype('int8')
        
    return train

SEEDS = 7
NFOLDS = 7
DRUG_THRESH = 18

train = make_cv_folds(train, SEEDS, NFOLDS, DRUG_THRESH)
train.head()

Unnamed: 0,sig_id,cp_time,cp_dose,0,1,2,3,4,5,6,...,xanthine_oxidase_inhibitor,xiap_inhibitor,drug_id,kfold_0,kfold_1,kfold_2,kfold_3,kfold_4,kfold_5,kfold_6
0,id_000644bb2,24,D1,1.146806,0.902075,-0.418339,-0.961202,-0.25477,-1.0213,-1.369236,...,0,0,b68db1d53,1,3,3,5,0,5,2
1,id_000779bfc,72,D1,0.128824,0.676862,0.274345,0.090495,1.208863,0.688965,0.316734,...,0,0,df89a8e5a,0,3,6,3,3,4,1
2,id_000a6266a,48,D1,0.790372,0.939951,1.428097,-0.121817,-0.002067,1.495091,0.238763,...,0,0,18bb41b2c,5,3,3,1,3,0,4
3,id_0015fd391,48,D1,-0.729866,-0.277163,-0.4412,0.766612,2.347817,-0.862761,-2.308829,...,0,0,8c7f86626,4,3,1,2,4,5,2
4,id_001626bd3,72,D2,-0.444558,-0.481202,0.974729,0.977467,1.468304,-0.874772,-0.372682,...,0,0,7cbed3131,6,5,3,3,2,1,4


In [29]:
MAX_EPOCH = 200

In [30]:
class LogitsLogLoss(Metric):

    def __init__(self):
        self._name = "logits_ll"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        logits = 1 / (1 + np.exp(-y_pred))
        aux = (1 - y_true) * np.log(1 - logits + 1e-15) + y_true * np.log(logits + 1e-15)
        return np.mean(-aux)

In [42]:
def run_training(fold_id, seed_id):
    seed_everything(seed_id)
    
    train_ = process_data(train)
    test_ = process_data(test)
    
    kfold_col = f'kfold_{seed_id}'
    trn_idx = train_[train_[kfold_col] != fold_id].index
    val_idx = train_[train_[kfold_col] == fold_id].index
    
    train_df = train_[train_[kfold_col] != fold_id].reset_index(drop=True)
    valid_df = train_[train_[kfold_col] == fold_id].reset_index(drop=True)
    
    def train_model(model, tag_name, target_cols_now):
        x_train, y_train  = train_df[feature_cols].values, train_df[target_cols_now].values
        x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols_now].values
        
        model.fit(
            X_train = x_train,
            y_train = y_train,
            eval_set = [(x_valid, y_valid)],
            eval_name = ["val"],
            eval_metric = ["logits_ll"],
            max_epochs = MAX_EPOCH,
            patience = 20,
            batch_size = 1024, 
            virtual_batch_size = 32,
            num_workers = 1,
            drop_last = False,
            # To use binary cross entropy because this is not a regression problem
            loss_fn = SmoothBCEwLogits(smoothing=5e-5) # F.binary_cross_entropy_with_logits
        )
        # save tabnet model
        print('Model Saving:',f"{tag_name}_FOLD{fold_id}_.pth")
        saving_path_name =  f"{tag_name}_FOLD{fold_id}_.pth"
        saved_filepath = model.save_model(saving_path_name) # loaded_clf.load_model(saved_filepath)
        
        preds_val = model.predict(x_valid)
        preds = 1 / (1 + np.exp(-preds_val))
        
        oof = np.zeros((len(train), len(target_cols_now)))
        oof[val_idx] = preds
        return oof
    
    tabnet_params = dict(
        n_d = 32,
        n_a = 32,
        n_steps = 1,
        gamma = 1.3,
        lambda_sparse = 0,
        optimizer_fn = optim.Adam,
        optimizer_params = dict(lr = 2e-2, weight_decay = 1e-5),
        mask_type = "entmax",
        scheduler_params = dict(mode = "min", patience = 5, min_lr = 1e-5, factor = 0.9),
        scheduler_fn = ReduceLROnPlateau,
        seed = seed_id,
        verbose = 10
    )
    model = TabNetRegressor(**tabnet_params)

    # Fine-tune the model on scored targets only
    oof = train_model(model, 'tabnet_v2', target_cols)

    #--------------------- PREDICTION---------------------
    
    x_test = test_[feature_cols].values
    preds = model.predict(x_test)
    preds = (1 / (1 + np.exp(-preds)))

    return oof, preds

In [43]:
def run_k_fold(NFOLDS, seed_id):
    oof = np.zeros((len(train), len(target_cols)))
    predictions = np.zeros((len(test), len(target_cols)))
    
    for fold_id in range(NFOLDS):
        print(f'Seed:{seed_id},Fold:{fold_id}')
        oof_, pred_ = run_training(fold_id, seed_id)
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

In [44]:
from time import time

# Averaging on multiple SEEDS
SEED = [0, 1, 2, 3, 4, 5, 6]
oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))

time_begin = time()

for seed_id in SEED:
    oof_, predictions_ = run_k_fold(NFOLDS, seed_id)
    oof += oof_ / len(SEED)
    predictions += predictions_ / len(SEED)

time_diff = time() - time_begin

train[target_cols] = oof
test[target_cols] = predictions_

Seed:0,Fold:0
Device used : cuda
epoch 0  | loss: 0.38    | val_logits_ll: 0.04423 |  0:00:02s
epoch 10 | loss: 0.0212  | val_logits_ll: 0.02268 |  0:00:28s
epoch 20 | loss: 0.02028 | val_logits_ll: 0.01981 |  0:00:53s
epoch 30 | loss: 0.02    | val_logits_ll: 0.01964 |  0:01:18s
epoch 40 | loss: 0.01988 | val_logits_ll: 0.0195  |  0:01:43s
epoch 50 | loss: 0.01982 | val_logits_ll: 0.01944 |  0:02:10s
epoch 60 | loss: 0.01972 | val_logits_ll: 0.01939 |  0:02:35s
epoch 70 | loss: 0.01966 | val_logits_ll: 0.01935 |  0:03:01s
epoch 80 | loss: 0.01971 | val_logits_ll: 0.01947 |  0:03:27s
epoch 90 | loss: 0.01966 | val_logits_ll: 0.01931 |  0:03:52s
epoch 100| loss: 0.0196  | val_logits_ll: 0.01927 |  0:04:18s

Early stopping occured at epoch 104 with best_epoch = 84 and best_val_logits_ll = 0.01923
Best weights from best epoch are automatically used!
Model Saving: tabnet_v2_FOLD0_.pth
Successfully saved model at tabnet_v2_FOLD0_.pth.zip
Seed:0,Fold:1
Device used : cuda
epoch 0  | loss: 0.3

epoch 40 | loss: 0.0199  | val_logits_ll: 0.02011 |  0:01:45s
epoch 50 | loss: 0.01975 | val_logits_ll: 0.01947 |  0:02:10s
epoch 60 | loss: 0.0197  | val_logits_ll: 0.0194  |  0:02:35s
epoch 70 | loss: 0.01961 | val_logits_ll: 0.01941 |  0:03:01s
epoch 80 | loss: 0.01964 | val_logits_ll: 0.01936 |  0:03:27s
epoch 90 | loss: 0.01958 | val_logits_ll: 0.01936 |  0:03:52s
epoch 100| loss: 0.01958 | val_logits_ll: 0.01934 |  0:04:18s
epoch 110| loss: 0.01964 | val_logits_ll: 0.01936 |  0:04:44s
epoch 120| loss: 0.01959 | val_logits_ll: 0.01933 |  0:05:09s
epoch 130| loss: 0.01953 | val_logits_ll: 0.01931 |  0:05:34s
epoch 140| loss: 0.01952 | val_logits_ll: 0.01932 |  0:06:00s
epoch 150| loss: 0.01952 | val_logits_ll: 0.0193  |  0:06:27s
epoch 160| loss: 0.01951 | val_logits_ll: 0.01929 |  0:06:53s
epoch 170| loss: 0.01949 | val_logits_ll: 0.01927 |  0:07:18s
epoch 180| loss: 0.01948 | val_logits_ll: 0.01929 |  0:07:43s
epoch 190| loss: 0.01947 | val_logits_ll: 0.01928 |  0:08:09s
Stop tra

epoch 0  | loss: 0.34205 | val_logits_ll: 0.03669 |  0:00:02s
epoch 10 | loss: 0.02151 | val_logits_ll: 0.02145 |  0:00:28s
epoch 20 | loss: 0.02019 | val_logits_ll: 0.02041 |  0:00:54s
epoch 30 | loss: 0.02001 | val_logits_ll: 0.02021 |  0:01:20s
epoch 40 | loss: 0.0199  | val_logits_ll: 0.01976 |  0:01:45s
epoch 50 | loss: 0.01982 | val_logits_ll: 0.01961 |  0:02:10s
epoch 60 | loss: 0.01977 | val_logits_ll: 0.01946 |  0:02:36s
epoch 70 | loss: 0.0197  | val_logits_ll: 0.01948 |  0:03:01s
epoch 80 | loss: 0.01962 | val_logits_ll: 0.01942 |  0:03:27s
epoch 90 | loss: 0.01962 | val_logits_ll: 0.01944 |  0:03:53s
epoch 100| loss: 0.01959 | val_logits_ll: 0.01941 |  0:04:17s
epoch 110| loss: 0.01975 | val_logits_ll: 0.01956 |  0:04:43s
epoch 120| loss: 0.01957 | val_logits_ll: 0.01938 |  0:05:09s
epoch 130| loss: 0.01955 | val_logits_ll: 0.01939 |  0:05:35s
epoch 140| loss: 0.01953 | val_logits_ll: 0.01937 |  0:06:00s
epoch 150| loss: 0.01954 | val_logits_ll: 0.01939 |  0:06:27s
epoch 16

epoch 190| loss: 0.01948 | val_logits_ll: 0.01929 |  0:08:18s
Stop training because you reached max_epochs = 200 with best_epoch = 187 and best_val_logits_ll = 0.01926
Best weights from best epoch are automatically used!
Model Saving: tabnet_v2_FOLD4_.pth
Successfully saved model at tabnet_v2_FOLD4_.pth.zip
Seed:2,Fold:5
Device used : cuda
epoch 0  | loss: 0.35218 | val_logits_ll: 0.03947 |  0:00:02s
epoch 10 | loss: 0.02142 | val_logits_ll: 0.02106 |  0:00:28s
epoch 20 | loss: 0.02021 | val_logits_ll: 0.01988 |  0:00:54s
epoch 30 | loss: 0.01995 | val_logits_ll: 0.01955 |  0:01:20s
epoch 40 | loss: 0.01985 | val_logits_ll: 0.01935 |  0:01:45s
epoch 50 | loss: 0.01989 | val_logits_ll: 0.01947 |  0:02:10s
epoch 60 | loss: 0.01977 | val_logits_ll: 0.01923 |  0:02:37s
epoch 70 | loss: 0.0197  | val_logits_ll: 0.01923 |  0:03:03s
epoch 80 | loss: 0.01964 | val_logits_ll: 0.01915 |  0:03:28s
epoch 90 | loss: 0.01967 | val_logits_ll: 0.01924 |  0:03:54s

Early stopping occured at epoch 91 wi

Model Saving: tabnet_v2_FOLD4_.pth
Successfully saved model at tabnet_v2_FOLD4_.pth.zip
Seed:3,Fold:5
Device used : cuda
epoch 0  | loss: 0.32521 | val_logits_ll: 0.03403 |  0:00:02s
epoch 10 | loss: 0.02167 | val_logits_ll: 0.02125 |  0:00:27s
epoch 20 | loss: 0.02029 | val_logits_ll: 0.02033 |  0:00:54s
epoch 30 | loss: 0.02    | val_logits_ll: 0.01998 |  0:01:18s
epoch 40 | loss: 0.01985 | val_logits_ll: 0.01965 |  0:01:44s
epoch 50 | loss: 0.01977 | val_logits_ll: 0.01956 |  0:02:10s
epoch 60 | loss: 0.01971 | val_logits_ll: 0.01969 |  0:02:35s
epoch 70 | loss: 0.01972 | val_logits_ll: 0.01966 |  0:03:01s
epoch 80 | loss: 0.01966 | val_logits_ll: 0.01951 |  0:03:26s
epoch 90 | loss: 0.01961 | val_logits_ll: 0.01945 |  0:03:51s
epoch 100| loss: 0.0196  | val_logits_ll: 0.01945 |  0:04:17s
epoch 110| loss: 0.01959 | val_logits_ll: 0.0196  |  0:04:42s
epoch 120| loss: 0.01961 | val_logits_ll: 0.01961 |  0:05:07s
epoch 130| loss: 0.01961 | val_logits_ll: 0.01943 |  0:05:32s
epoch 140| 

epoch 140| loss: 0.0196  | val_logits_ll: 0.01944 |  0:06:03s
epoch 150| loss: 0.01956 | val_logits_ll: 0.01933 |  0:06:29s
epoch 160| loss: 0.01958 | val_logits_ll: 0.01933 |  0:06:55s
epoch 170| loss: 0.01954 | val_logits_ll: 0.01932 |  0:07:22s
epoch 180| loss: 0.01954 | val_logits_ll: 0.01931 |  0:07:48s

Early stopping occured at epoch 186 with best_epoch = 166 and best_val_logits_ll = 0.0193
Best weights from best epoch are automatically used!
Model Saving: tabnet_v2_FOLD4_.pth
Successfully saved model at tabnet_v2_FOLD4_.pth.zip
Seed:4,Fold:5
Device used : cuda
epoch 0  | loss: 0.34447 | val_logits_ll: 0.03516 |  0:00:02s
epoch 10 | loss: 0.02161 | val_logits_ll: 0.02164 |  0:00:27s
epoch 20 | loss: 0.02035 | val_logits_ll: 0.02089 |  0:00:54s
epoch 30 | loss: 0.02002 | val_logits_ll: 0.01989 |  0:01:19s
epoch 40 | loss: 0.01985 | val_logits_ll: 0.01957 |  0:01:45s
epoch 50 | loss: 0.01981 | val_logits_ll: 0.01957 |  0:02:11s
epoch 60 | loss: 0.01976 | val_logits_ll: 0.01955 |  

epoch 40 | loss: 0.01987 | val_logits_ll: 0.01976 |  0:01:45s
epoch 50 | loss: 0.01969 | val_logits_ll: 0.01944 |  0:02:11s
epoch 60 | loss: 0.01974 | val_logits_ll: 0.01942 |  0:02:36s
epoch 70 | loss: 0.01964 | val_logits_ll: 0.01936 |  0:03:02s
epoch 80 | loss: 0.0196  | val_logits_ll: 0.01936 |  0:03:28s
epoch 90 | loss: 0.0196  | val_logits_ll: 0.01938 |  0:03:54s
epoch 100| loss: 0.01957 | val_logits_ll: 0.01931 |  0:04:20s
epoch 110| loss: 0.01956 | val_logits_ll: 0.0193  |  0:04:45s
epoch 120| loss: 0.01952 | val_logits_ll: 0.01928 |  0:05:12s
epoch 130| loss: 0.01953 | val_logits_ll: 0.01929 |  0:05:39s
epoch 140| loss: 0.01953 | val_logits_ll: 0.0193  |  0:06:04s
epoch 150| loss: 0.01948 | val_logits_ll: 0.01929 |  0:06:31s
epoch 160| loss: 0.0195  | val_logits_ll: 0.01928 |  0:06:56s
epoch 170| loss: 0.0195  | val_logits_ll: 0.01926 |  0:07:22s
epoch 180| loss: 0.01948 | val_logits_ll: 0.01924 |  0:07:46s
epoch 190| loss: 0.01948 | val_logits_ll: 0.01926 |  0:08:12s
Stop tra

epoch 30 | loss: 0.01997 | val_logits_ll: 0.01974 |  0:01:21s
epoch 40 | loss: 0.01983 | val_logits_ll: 0.01939 |  0:01:46s
epoch 50 | loss: 0.01979 | val_logits_ll: 0.01939 |  0:02:11s
epoch 60 | loss: 0.01981 | val_logits_ll: 0.01952 |  0:02:35s
epoch 70 | loss: 0.01975 | val_logits_ll: 0.01945 |  0:03:00s
epoch 80 | loss: 0.01962 | val_logits_ll: 0.01924 |  0:03:26s
epoch 90 | loss: 0.01961 | val_logits_ll: 0.01929 |  0:03:52s
epoch 100| loss: 0.01962 | val_logits_ll: 0.01924 |  0:04:18s

Early stopping occured at epoch 100 with best_epoch = 80 and best_val_logits_ll = 0.01924
Best weights from best epoch are automatically used!
Model Saving: tabnet_v2_FOLD3_.pth
Successfully saved model at tabnet_v2_FOLD3_.pth.zip
Seed:6,Fold:4
Device used : cuda
epoch 0  | loss: 0.37073 | val_logits_ll: 0.04248 |  0:00:02s
epoch 10 | loss: 0.02165 | val_logits_ll: 0.02253 |  0:00:27s
epoch 20 | loss: 0.02023 | val_logits_ll: 0.02222 |  0:00:53s
epoch 30 | loss: 0.02003 | val_logits_ll: 0.02078 |  

In [39]:
time_diff/60/60,0.015603591991583895,16,96

3.3016611176066926

In [34]:
valid_results = scored.drop(columns=target_cols).merge(train[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)

y_true = scored[target_cols].values
y_pred = valid_results[target_cols].values

score = 0

for i in range(len(target_cols)):
    score += log_loss(y_true[:, i], y_pred[:, i])

print("CV log_loss: ", score / y_pred.shape[1])

CV log_loss:  0.015603591991583895


In [35]:
submission.shape

(3982, 207)

In [36]:
sub1 = submission.drop(columns=target_cols).merge(test[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)

sub1.to_csv('submission.csv', index=False)

CV log_loss:  0.01561899863439966,  pb:0.01834