In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

from datetime import datetime, timedelta
import time

import gc
import copy

import pyarrow.parquet as pq
import pyarrow as pa

 
from dateutil.relativedelta import relativedelta
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
from sklearn.metrics import mean_squared_error, roc_auc_score
from sklearn.model_selection import StratifiedKFold, KFold

In [3]:
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100

import warnings
warnings.filterwarnings("ignore")

import pytorch_lightning as pl
random_seed=1234
pl.seed_everything(random_seed)

Global seed set to 1234


1234

In [4]:
feats = ['B_10__first', 'B_10__last', 'B_10__max', 'B_10__mean', 'B_10__min', 'B_11__last', 'B_11__last2max', 'B_11__last__log', 'B_11__max', 'B_11__mean', 'B_11__mean__log', 'B_11__min', 'B_14__last2max', 'B_16__last', 'B_16__last2max', 'B_16__last__log', 'B_16__max', 'B_16__mean', 'B_16__mean__log', 'B_16__min', 'B_17__min', 'B_18__first', 'B_18__last', 'B_18__last2max', 'B_18__last__log', 'B_18__max', 'B_18__mean', 'B_18__mean__log', 'B_18__min', 'B_19__last', 'B_19__last__log', 'B_19__max', 'B_19__mean', 'B_19__mean__log', 'B_19__range', 'B_19__std', 'B_1__last', 'B_1__last2max', 'B_1__max', 'B_1__mean', 'B_1__min', 'B_20__last', 'B_20__last2max', 'B_20__last__log', 'B_20__max', 'B_20__mean', 'B_20__mean__log', 'B_20__range', 'B_22__last', 'B_22__last__log', 'B_22__max', 'B_22__mean', 'B_22__mean__log', 'B_22__range', 'B_22__std', 'B_23__first', 'B_23__last', 'B_23__last2max', 'B_23__last__log', 'B_23__max', 'B_23__mean', 'B_23__mean__log', 'B_23__min', 'B_23__range', 'B_23__std', 'B_25__last2max', 'B_26__last__log', 'B_28__last', 'B_28__last2max', 'B_2__first', 'B_2__last', 'B_2__last2max', 'B_2__last__log', 'B_2__max', 'B_2__mean', 'B_2__mean__log', 'B_2__min', 'B_30=0.0', 'B_30=1.0', 'B_30__nunique', 'B_33__first', 'B_33__last', 'B_33__last2max', 'B_33__last__log', 'B_33__max', 'B_33__mean', 'B_33__mean__log', 'B_33__min', 'B_37__last', 'B_37__last2max', 'B_37__max', 'B_37__mean', 'B_37__min', 'B_38=2.0', 'B_38=4.0', 'B_38=5.0', 'B_38__last', 'B_38__nunique', 'B_3__last', 'B_3__last2max', 'B_3__last__log', 'B_3__max', 'B_3__mean', 'B_3__mean__log', 'B_3__min', 'B_3__range', 'B_3__std', 'B_40__last', 'B_40__last__log', 'B_40__mean', 'B_40__mean__log', 'B_40__min', 'B_4__last', 'B_4__last2max', 'B_4__last__log', 'B_4__max', 'B_4__mean', 'B_4__mean__log', 'B_4__min', 'B_5__last__log', 'B_6__last', 'B_6__last2max', 'B_6__max', 'B_6__mean', 'B_6__min', 'B_7__first', 'B_7__last', 'B_7__last2max', 'B_7__max', 'B_7__mean', 'B_7__min', 'B_7__range', 'B_7__std', 'B_8__first', 'B_8__last', 'B_8__last__log', 'B_8__mean', 'B_8__mean__log', 'B_8__min', 'B_9__first', 'B_9__last', 'B_9__last2max', 'B_9__last__log', 'B_9__max', 'B_9__mean', 'B_9__mean__log', 'B_9__min', 'D_112__last', 'D_112__last2max', 'D_112__last__log', 'D_39__last', 'D_39__last__log', 'D_39__max', 'D_39__range', 'D_39__std', 'D_41__last', 'D_41__last__log', 'D_41__max', 'D_41__mean__log', 'D_41__range', 'D_41__std', 'D_42__first', 'D_42__last', 'D_42__max', 'D_42__mean', 'D_42__min', 'D_43__last', 'D_43__max', 'D_43__mean', 'D_43__mean__log', 'D_44__first', 'D_44__last', 'D_44__last2max', 'D_44__last__log', 'D_44__max', 'D_44__mean', 'D_44__mean__log', 'D_44__min', 'D_44__range', 'D_44__std', 'D_45__first', 'D_45__last', 'D_45__last__log', 'D_45__max', 'D_45__mean', 'D_45__mean__log', 'D_45__min', 'D_48__first', 'D_48__last', 'D_48__last2max', 'D_48__max', 'D_48__mean', 'D_48__min', 'D_51__mean__log', 'D_52__first', 'D_52__last', 'D_52__max', 'D_52__mean', 'D_52__min', 'D_53__last__log', 'D_53__max', 'D_53__mean__log', 'D_55__last', 'D_55__last__log', 'D_55__max', 'D_55__mean', 'D_55__mean__log', 'D_55__min', 'D_55__range', 'D_55__std', 'D_58__first', 'D_58__last', 'D_58__last2max', 'D_58__last__log', 'D_58__max', 'D_58__mean', 'D_58__mean__log', 'D_58__min', 'D_58__range', 'D_58__std', 'D_61__first', 'D_61__last', 'D_61__max', 'D_61__mean', 'D_61__min', 'D_62__first', 'D_62__last', 'D_62__max', 'D_62__mean', 'D_62__min', 'D_65__last__log', 'D_65__mean__log', 'D_70__last', 'D_70__last__log', 'D_70__max', 'D_70__mean', 'D_70__mean__log', 'D_70__range', 'D_70__std', 'D_74__first', 'D_74__last', 'D_74__last2max', 'D_74__last__log', 'D_74__max', 'D_74__mean', 'D_74__mean__log', 'D_74__min', 'D_74__range', 'D_74__std', 'D_75__first', 'D_75__last', 'D_75__last2max', 'D_75__last__log', 'D_75__max', 'D_75__mean', 'D_75__mean__log', 'D_75__min', 'D_75__range', 'D_75__std', 'D_77__first', 'D_77__last', 'D_77__last__log', 'D_77__max', 'D_77__mean', 'D_77__mean__log', 'D_77__min', 'D_78__last', 'D_78__last__log', 'D_78__max', 'D_78__mean', 'D_78__mean__log', 'D_78__range', 'D_78__std', 'D_84__max', 'D_84__mean', 'D_84__mean__log', 'D_84__range', 'D_84__std', 'P_2__first', 'P_2__last', 'P_2__last2max', 'P_2__max', 'P_2__mean', 'P_2__min', 'P_2__range', 'P_2__std', 'P_3__last', 'P_3__mean', 'P_3__min', 'R_10__max', 'R_10__mean', 'R_10__mean__log', 'R_10__range', 'R_10__std', 'R_15__max', 'R_15__mean__log', 'R_15__range', 'R_15__std', 'R_16__mean__log', 'R_1__last', 'R_1__last__log', 'R_1__max', 'R_1__mean', 'R_1__mean__log', 'R_1__range', 'R_1__std', 'R_2__last', 'R_2__last__log', 'R_2__max', 'R_2__mean', 'R_2__mean__log', 'R_2__range', 'R_2__std', 'R_3__last__log', 'R_3__max', 'R_3__mean', 'R_3__mean__log', 'R_3__min', 'R_3__range', 'R_3__std', 'R_4__last', 'R_4__max', 'R_4__mean', 'R_4__mean__log', 'R_4__range', 'R_4__std', 'R_5__last', 'R_5__max', 'R_5__mean', 'R_5__mean__log', 'R_5__range', 'R_5__std', 'R_6__last__log', 'R_6__max', 'R_6__mean', 'R_6__mean__log', 'R_6__range', 'R_6__std', 'R_7__mean', 'R_7__mean__log', 'R_7__range', 'R_7__std', 'R_8__max', 'R_8__mean', 'R_8__mean__log', 'R_8__range', 'R_8__std', 'S_15__max', 'S_15__mean', 'S_15__range', 'S_22__last', 'S_23__last', 'S_23__range', 'S_23__std', 'S_25__last2max', 'S_25__mean', 'S_25__min', 'S_25__range', 'S_25__std', 'S_3__last', 'S_3__max', 'S_3__mean', 'S_3__min', 'S_3__range', 'S_3__std', 'S_7__last', 'S_7__max', 'S_7__mean', 'S_7__range', 'S_7__std', 'S_8__last', 'S_8__mean', 'S_8__min']
len(feats)

373

In [5]:

id_feats = ['customer_ID']
#date_col =  'S_2'
target_col = 'target'

feats = ['B_10__last', 'B_10__mean', 'B_11__last', 'B_11__last2max', 'B_11__last__log', 'B_11__mean', 'B_11__mean__log', 'B_11__min', 'B_12__last', 'B_12__last__log', 'B_13__last', 'B_13__last__log', 'B_14__last', 'B_14__last2max', 'B_14__min', 'B_15__last', 'B_15__mean', 'B_16__last', 'B_16__max', 'B_16__min', 'B_17__last', 'B_17__max', 'B_17__mean', 'B_17__mean2std', 'B_17__min', 'B_18__last', 'B_18__last__log', 'B_18__mean', 'B_18__mean__log', 'B_18__min', 'B_19__last', 'B_19__last__log', 'B_19__min', 'B_1__last', 'B_1__last2max', 'B_1__max', 'B_1__mean', 'B_1__min', 'B_20__last', 'B_20__last__log', 'B_20__max', 'B_20__mean', 'B_20__mean__log', 'B_20__min', 'B_21__last', 'B_21__last__log', 'B_21__max', 'B_21__mean__log', 'B_22__last', 'B_22__last__log', 'B_22__max', 'B_22__mean', 'B_22__mean__log', 'B_22__min', 'B_23__last', 'B_23__last__log', 'B_23__mean', 'B_23__min', 'B_24__last', 'B_24__last__log', 'B_24__mean', 'B_24__mean__log', 'B_25__last', 'B_25__last2max', 'B_25__min', 'B_26__last', 'B_26__last__log', 'B_26__mean__log', 'B_27__last', 'B_27__last__log', 'B_28__last', 'B_29__last', 'B_29__last__log', 'B_2__last', 'B_2__last2max', 'B_2__last__log', 'B_2__mean', 'B_2__mean__log', 'B_2__min', 'B_30=0.0', 'B_30=1.0', 'B_30=2.0', 'B_30__last', 'B_30__nunique', 'B_31__last', 'B_31__nunique', 'B_32__last', 'B_32__last__log', 'B_32__max', 'B_33__last', 'B_33__last2max', 'B_33__last__log', 'B_33__max', 'B_33__mean', 'B_33__mean__log', 'B_33__min', 'B_36__last', 'B_36__last__log', 'B_37__last', 'B_37__last2max', 'B_37__max', 'B_37__mean', 'B_38=4.0', 'B_38=5.0', 'B_38=6.0', 'B_38=7.0', 'B_38__last', 'B_38__nunique', 'B_39__last', 'B_3__last', 'B_3__last__log', 'B_3__max', 'B_3__mean', 'B_3__mean__log', 'B_3__min', 'B_3__std', 'B_40__last', 'B_40__last__log', 'B_40__mean__log', 'B_40__min', 'B_41__last', 'B_41__last__log', 'B_42__last', 'B_42__last__log', 'B_4__last', 'B_4__last2max', 'B_4__last__log', 'B_4__max', 'B_4__mean2std', 'B_4__mean__log', 'B_5__last', 'B_5__last__log', 'B_5__mean', 'B_6__last', 'B_6__min', 'B_7__last', 'B_7__max', 'B_7__mean', 'B_7__min', 'B_8__last', 'B_8__last__log', 'B_8__max', 'B_8__mean', 'B_8__mean__log', 'B_8__min', 'B_9__last', 'B_9__last2max', 'B_9__last__log', 'B_9__max', 'B_9__mean', 'B_9__mean__log', 'B_9__min', 'D_102__last', 'D_102__last__log', 'D_102__max', 'D_103__last', 'D_104__last', 'D_105__last', 'D_105__max', 'D_106__last', 'D_106__last__log', 'D_107__last', 'D_107__last__log', 'D_107__max', 'D_107__mean__log', 'D_108__last', 'D_108__last__log', 'D_109__last', 'D_109__last__log', 'D_109__mean', 'D_110__last', 'D_111__last', 'D_112__last', 'D_112__last__log', 'D_113__last', 'D_113__last__log', 'D_113__max', 'D_114=0.0', 'D_114__last', 'D_114__nunique', 'D_115__last', 'D_115__last__log', 'D_116__last', 'D_117__last', 'D_118__last', 'D_118__last__log', 'D_118__mean__log', 'D_119__last', 'D_119__last__log', 'D_120=1.0', 'D_120__last', 'D_120__nunique', 'D_121__last', 'D_122__last', 'D_122__max', 'D_122__min', 'D_123__last', 'D_123__last__log', 'D_123__mean', 'D_124__last', 'D_125__last', 'D_125__last__log', 'D_126__last', 'D_127__last', 'D_128__last', 'D_128__max', 'D_128__min', 'D_129__last', 'D_129__max', 'D_129__mean', 'D_130__last', 'D_130__max', 'D_131__last', 'D_131__last__log', 'D_131__max', 'D_131__min', 'D_132__last', 'D_132__mean2std', 'D_132__min', 'D_133__last', 'D_133__last__log', 'D_133__max', 'D_133__min', 'D_134__last', 'D_134__min', 'D_135__last', 'D_135__last__log', 'D_135__mean2std', 'D_136__last', 'D_136__last__log', 'D_137__last', 'D_137__last__log', 'D_138__last', 'D_138__last__log', 'D_139__last', 'D_139__mean', 'D_140__last', 'D_140__last__log', 'D_140__max', 'D_141__last', 'D_142__last', 'D_143__last', 'D_144__last', 'D_145__last', 'D_39__last', 'D_39__last__log', 'D_39__max', 'D_39__mean', 'D_39__range', 'D_39__std', 'D_41__last', 'D_41__last__log', 'D_41__max', 'D_41__mean', 'D_41__min', 'D_42__first', 'D_42__last', 'D_42__max', 'D_42__mean', 'D_42__mean2std', 'D_42__min', 'D_43__last', 'D_43__last__log', 'D_43__max', 'D_43__mean', 'D_43__mean__log', 'D_43__min', 'D_44__last', 'D_44__last2max', 'D_44__last__log', 'D_44__max', 'D_44__mean', 'D_44__mean__log', 'D_44__min', 'D_44__range', 'D_44__std', 'D_45__first', 'D_45__last', 'D_45__last__log', 'D_45__max', 'D_45__mean', 'D_45__mean__log', 'D_45__min', 'D_46__last', 'D_46__mean', 'D_46__mean2std', 'D_46__min', 'D_47__last', 'D_48__first', 'D_48__last', 'D_48__max', 'D_48__mean', 'D_48__min', 'D_49__last', 'D_49__last__log', 'D_49__mean2std', 'D_50__last', 'D_51__last', 'D_51__last__log', 'D_51__mean', 'D_52__last', 'D_52__max', 'D_52__mean', 'D_52__mean2std', 'D_52__min', 'D_53__last', 'D_53__last__log', 'D_53__max', 'D_53__mean2std', 'D_53__min', 'D_54__last', 'D_55__last', 'D_55__last__log', 'D_55__max', 'D_55__min', 'D_56__last', 'D_56__min', 'D_58__last', 'D_58__min', 'D_59__last', 'D_59__max', 'D_60__last', 'D_60__last__log', 'D_60__max', 'D_61__last', 'D_61__max', 'D_61__mean', 'D_61__min', 'D_62__last', 'D_62__max', 'D_62__mean', 'D_62__min', 'D_64=U', 'D_65__last', 'D_65__last__log', 'D_65__max', 'D_65__mean', 'D_65__mean__log', 'D_68=1.0', 'D_68__last', 'D_69__last', 'D_69__min', 'D_70__last', 'D_70__max', 'D_70__mean2std', 'D_70__min', 'D_71__last', 'D_72__last', 'D_72__max', 'D_72__min', 'D_73__last', 'D_74__last', 'D_74__max', 'D_74__mean', 'D_75__last', 'D_75__last__log', 'D_75__max', 'D_75__mean', 'D_75__mean__log', 'D_76__last', 'D_77__last', 'D_77__max', 'D_77__mean', 'D_77__min', 'D_78__last', 'D_78__max', 'D_78__mean', 'D_78__mean__log', 'D_79__last', 'D_79__max', 'D_80__last', 'D_81__last', 'D_81__max', 'D_81__mean', 'D_82__last', 'D_83__last', 'D_84__last', 'D_84__max', 'D_84__mean', 'D_86__last', 'D_87__last', 'D_88__last', 'D_89__last', 'D_89__max', 'D_89__mean', 'D_91__last', 'D_92__last', 'D_93__last', 'D_94__last', 'D_96__last', 'P_2__first', 'P_2__last', 'P_2__last2max', 'P_2__max', 'P_2__mean', 'P_2__min', 'P_3__last', 'P_3__max', 'P_3__mean', 'P_3__min', 'P_4__last', 'P_4__max', 'P_4__mean', 'P_4__min', 'R_10__last', 'R_10__max', 'R_10__mean', 'R_10__mean__log', 'R_10__std', 'R_11__last', 'R_11__max', 'R_11__mean', 'R_12__last', 'R_12__max', 'R_12__mean', 'R_13__last', 'R_13__max', 'R_13__mean', 'R_13__mean2std', 'R_14__last', 'R_15__last', 'R_15__max', 'R_15__mean', 'R_16__last', 'R_16__max', 'R_16__mean', 'R_16__mean__log', 'R_17__last', 'R_17__max', 'R_18__last', 'R_18__max', 'R_19__last', 'R_19__min', 'R_1__last', 'R_1__last__log', 'R_1__max', 'R_1__mean', 'R_1__mean__log', 'R_1__min', 'R_1__range', 'R_1__std', 'R_20__last', 'R_20__max', 'R_20__mean', 'R_21__last', 'R_22__last', 'R_22__max', 'R_23__last', 'R_24__last', 'R_24__max', 'R_24__mean', 'R_25__last', 'R_25__max', 'R_26__last', 'R_26__mean2std', 'R_27__last', 'R_27__max', 'R_27__mean', 'R_27__min', 'R_28__last', 'R_28__last__log', 'R_2__last', 'R_2__last__log', 'R_2__max', 'R_2__mean', 'R_2__mean__log', 'R_2__min', 'R_2__range', 'R_2__std', 'R_3__last', 'R_3__last__log', 'R_3__max', 'R_3__mean', 'R_3__mean__log', 'R_3__min', 'R_4__last', 'R_4__max', 'R_4__mean', 'R_4__mean__log', 'R_4__min', 'R_5__last', 'R_5__max', 'R_5__mean', 'R_5__mean__log', 'R_5__range', 'R_6__last', 'R_6__last__log', 'R_6__max', 'R_6__mean', 'R_6__mean__log', 'R_7__last', 'R_7__max', 'R_7__mean', 'R_7__range', 'R_8__last', 'R_8__max', 'R_8__mean', 'R_8__mean__log', 'R_9__last', 'R_9__mean2std', 'S_11__last', 'S_11__mean', 'S_12__last', 'S_12__max', 'S_13__last', 'S_15__last', 'S_15__max', 'S_15__mean', 'S_16__last', 'S_17__last', 'S_17__min', 'S_18__last', 'S_19__last', 'S_20__last', 'S_20__max', 'S_22__last', 'S_22__max', 'S_22__mean', 'S_23__last', 'S_23__max', 'S_23__mean', 'S_24__last', 'S_25__last', 'S_25__last2max', 'S_25__mean', 'S_25__min', 'S_25__range', 'S_25__std', 'S_26__last', 'S_26__last__log', 'S_27__last', 'S_27__mean2std', 'S_3__last', 'S_3__max', 'S_3__mean', 'S_3__min', 'S_5__last', 'S_5__last__log', 'S_5__mean__log', 'S_6__last', 'S_6__max', 'S_6__min', 'S_7__last', 'S_7__max', 'S_7__mean', 'S_7__min', 'S_8__last', 'S_8__mean', 'S_8__min', 'S_9__last', 'S_9__max']

len(feats)

551

In [6]:
%%time
# train_file = r'/kaggle/input/amex-agg-data-rev2/agg_train_all_rev2_rev.parquet'
train_file = r'amex/agg_v3/agg_train_all_small.parquet'
df = pd.read_parquet(train_file, columns=id_feats + [target_col] + feats, engine='pyarrow')


Wall time: 11.1 s


## define 1d-cnn model

In [7]:
#https://www.kaggle.com/code/scaomath/g2net-1d-cnn-gem-pool-pytorch-train-inference
#Architecture from there https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.120.141103
#https://github.com/baosenguo/Kaggle-MoA-2nd-Place-Solution

import torch
from torch import nn
import numpy as np


class CNN1d(nn.Module):
    def __init__(
        self, 
        input_size, #number of input features
        output_size, #output dimension
        hidden_sizes = [4096], #first layer linear output size
        channels = [256, 512, 512], #channel sizes
        dropouts = [0.1, 0.1, 0.1, 0.3, 0.2, 0.2], #dropout rates
        celu_alpha = 0.06, 
    ):
        super().__init__()
        
        #the channel size after reshaping linear output matrix
        #for example, if Linear layer output is 4096 and first 1d CNN input channel is 256, 
        #then the reshape size is 16
        linear_reshape_channel = int(hidden_sizes[0]/channels[0]) 
        avg_pool_output = int(hidden_sizes[0]/channels[0]/2)
        max_pool_output = int(hidden_sizes[0]/channels[0]/2/2)*channels[2]
        
        self.linear_reshape_channel = linear_reshape_channel
        self.channels = channels
        
        #transform n*m (m=input_size) matrix into n*hidden_sizes[0] matrix
        self.linear1 = nn.Sequential(
            nn.BatchNorm1d(input_size),
            nn.Dropout(dropouts[0]),
            nn.utils.weight_norm(nn.Linear(input_size, hidden_sizes[0])),
            nn.CELU(alpha=celu_alpha)
        )
        
        #1st layer of convolutional network
        self.cnn1 = nn.Sequential(
            nn.BatchNorm1d(channels[0]), 
            nn.Dropout(dropouts[1]),
            nn.utils.weight_norm(nn.Conv1d(in_channels = channels[0], 
                                           out_channels = channels[1], 
                                           kernel_size = 5, 
                                           stride=1, 
                                           padding=2, 
                                           bias=False), dim=None),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(output_size = avg_pool_output)

        )
        
        #2nd layer of convolutional network
        self.cnn2 = nn.Sequential(
            nn.BatchNorm1d(channels[1]), 
            nn.Dropout(dropouts[2]),
            nn.utils.weight_norm(nn.Conv1d(in_channels = channels[1], 
                                           out_channels = channels[1], 
                                           kernel_size = 3, 
                                           stride=1, 
                                           padding=1, 
                                           bias=True), dim=None),
            nn.ReLU(),

        )
        
        #3rd layer of convolutional network
        self.cnn3 = nn.Sequential(
            nn.BatchNorm1d(channels[1]), 
            nn.Dropout(dropouts[3]),
            nn.utils.weight_norm(nn.Conv1d(in_channels = channels[1], 
                                           out_channels = channels[1], 
                                           kernel_size = 3, 
                                           stride=1, 
                                           padding=1,  
                                           bias=True), dim=None),
            nn.ReLU(),
            
            nn.BatchNorm1d(channels[1]), 
            nn.Dropout(dropouts[4]),
            nn.utils.weight_norm(nn.Conv1d(in_channels = channels[1], 
                                           out_channels = channels[2], 
                                           kernel_size = 5, 
                                           stride=1, 
                                           padding=2, 
                                           bias=True), dim=None),
            nn.ReLU(),
            
        )
        
        #output layer 
        self.out = nn.Sequential(
            nn.MaxPool1d(kernel_size = 4, stride=2, padding=1),
            nn.Flatten(),
            nn.BatchNorm1d(max_pool_output),
            nn.Dropout(dropouts[5]),
            nn.utils.weight_norm(nn.Linear(max_pool_output, output_size)),
            nn.Sigmoid()
        )


    def forward(self, x):
        x = self.linear1(x)
        x = x.reshape(x.shape[0],  self.channels[0], self.linear_reshape_channel)
        
        x = self.cnn1(x)
        x_cnn2 = self.cnn2(x)
        
        x = self.cnn3(x)
        x = x_cnn2*x
        
        x = self.out(x)

        return x

In [8]:
import torch
from torch.utils.data import (Dataset, DataLoader)
  

class TS_Data(Dataset):
    
    def __init__(self, X, y): 
        
        features = torch.FloatTensor(X)
        targets = torch.FloatTensor(y)
        
        self.features = features
        self.targets = targets
        
        self.n_samples = X.shape[0]
        self.n_features = X.shape[1]
        
    def __len__(self):
        return self.n_samples

    def __getitem__(self, idx):
        

        x = self.features[idx]
        y = self.targets[idx]
        
        return x, y
    

def load_data(X, y, batch_size, n_workers=0, shuffle=False):
    data = TS_Data(X, y)
    
    loader = DataLoader(data, batch_size=batch_size, num_workers=n_workers, shuffle=shuffle)
    
    return loader

## hyperopt parameters

In [9]:
learn_rates = np.concatenate((np.arange(0.00001, 0.0001, 0.00001),  
                           np.arange(0.0001, 0.001, 0.0001), 
                           np.arange(0.001, 0.01, 0.001), 
                           np.arange(0.01, 0.05, 0.01)
                          ), 
                          axis=0)
hidden_sizes=[256, 512, 1024, 2048, 4096] 
dropouts = np.round(np.arange(0.001, 0.501, 0.001), 4)
channel_list = [16, 32, 64, 128, 256, 512]

len(learn_rates), len(hidden_sizes), len(dropouts)

(31, 5, 500)

In [31]:
from hyperopt import hp
import numpy as np
space  = { 
             'batch_size': hp.choice('batch_size', [128*i for i in [1, 2, 4, 8, 16, 20, 32, 40, 80, 100]]),
             'num_epochs':hp.choice('num_epochs', range(5, 65, 5)),
             'learning_rate':hp.choice('learning_rate', learn_rates),
             'hidden_size1':hp.choice('hidden_size1', hidden_sizes),
             'dropout1':  hp.choice('dropout1', dropouts), 
             'dropout2':  hp.choice('dropout2', dropouts), 
             'dropout3':  hp.choice('dropout3', dropouts), 
             'dropout4':  hp.choice('dropout4', dropouts), 
             'dropout5':  hp.choice('dropout5', dropouts), 
             'dropout6':  hp.choice('dropout6', dropouts), 
             'celu_alpha': hp.choice('celu_alpha', [0.001, 0.005, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.08, 0.1, 0.2]), 
             'channel':  hp.choice('channel', channel_list), 
            
    
            }                  

### Train

In [11]:
# @yunchonggan's fast metric implementation
# From https://www.kaggle.com/competitions/amex-default-prediction/discussion/328020
# https://www.kaggle.com/code/ambrosm/amex-lightgbm-quickstart
def amex_metric(y_true: np.array, y_pred: np.array) -> float:

    # count of positives and negatives
    n_pos = y_true.sum()
    n_neg = y_true.shape[0] - n_pos

    # sorting by descring prediction values
    indices = np.argsort(y_pred)[::-1]
    preds, target = y_pred[indices], y_true[indices]

    # filter the top 4% by cumulative row weights
    weight = 20.0 - target * 19.0
    cum_norm_weight = (weight / weight.sum()).cumsum()
    four_pct_filter = cum_norm_weight <= 0.04

    # default rate captured at 4%
    d = target[four_pct_filter].sum() / n_pos

    # weighted gini coefficient
    lorentz = (target / n_pos).cumsum()
    gini = ((lorentz - cum_norm_weight) * weight).sum()

    # max weighted gini coefficient
    gini_max = 10 * n_neg * (1 - 19 / (n_pos + 20 * n_neg))

    # normalized weighted gini coefficient
    g = gini / gini_max

    return 0.5 * (g + d)


In [12]:
X = df[feats]
y = df[['target']]

In [13]:
del df
gc.collect()

42

In [14]:
skf = KFold(n_splits=3)

In [15]:
print(skf)

for train_index, test_index in skf.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
#     X_train, X_test = X.iloc[train_index], X.iloc[test_index]
#     y_train, y_test = y.iloc[train_index], y.iloc[test_index]

KFold(n_splits=3, random_state=None, shuffle=False)
TRAIN: [152971 152972 152973 ... 458910 458911 458912] TEST: [     0      1      2 ... 152968 152969 152970]
TRAIN: [     0      1      2 ... 458910 458911 458912] TEST: [152971 152972 152973 ... 305939 305940 305941]
TRAIN: [     0      1      2 ... 305939 305940 305941] TEST: [305942 305943 305944 ... 458910 458911 458912]


In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [17]:
import numpy as np
import torch
from tqdm import tqdm
import torch.optim as optim
from torch.nn import CrossEntropyLoss, MSELoss, BCELoss

In [27]:
loss_dict = []

def score(params):
    pl.seed_everything(1)

    
    if params['hidden_size1']<4*params['channel']:
        print('invalid parameters')
        return {'loss': 9999, 'status': STATUS_OK}
    
    print(params)
    learning_rate = params['learning_rate']
    num_epochs = params['num_epochs']
    batch_size = params['batch_size']
    h_sizes = [params['hidden_size1']]
    drop_list = [params[f'dropout{i}'] for i in range(1,7)]
    celu_alpha = params['celu_alpha']
    channels = [params['channel'], 2*params['channel'], 2*params['channel']]

        
    losses = []
    
    for train_index, test_index in skf.split(X, y):
        
        #----start: data prep-------------------------------------
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        #----end: data prep-------------------------------------
        
#         print(X_train.shape, X_test.shape)
        
        
        minmax_scaler = MinMaxScaler()
        minmax_scaler.fit(X_train)
        # minmax_scaler.fit_transform(X_train[x_cols])

        train_loader = load_data(minmax_scaler.transform(X_train), y_train['target'].values, 
                                 batch_size=batch_size, n_workers=0, shuffle=False)

        test_loader = load_data(minmax_scaler.transform(X_test), y_test['target'].values, 
                                 batch_size=batch_size, n_workers=0, shuffle=False)
        #----end: data prep-------------------------------------


        model = CNN1d(input_size=len(feats), 
                      output_size=1, 
                      hidden_sizes = h_sizes, 
                      channels = channels, 
                      dropouts = drop_list, 
                      celu_alpha = celu_alpha)

        model = model.to(device)

        # optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)  
        optimizer = torch.optim.RMSprop([
                {'params': model.linear1.parameters()},
                {'params': model.cnn1.parameters()},
                {'params': model.cnn2.parameters()},
                {'params': model.cnn3.parameters()},
                {'params': model.out.parameters()},
            ], lr=learning_rate)

        scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                                  max_lr=1e-2, epochs=num_epochs, steps_per_epoch=len(train_loader))
        out_loss = BCELoss()


        #------train models--------------------------
        for epoch in range(num_epochs):
            model.train()
            for batch_idx, (features, targets) in enumerate(train_loader):

                features = features.to(device)
                targets = targets.to(device)

                ### FORWARD AND BACK PROP
                out = model(features)
                
                out_cost = out_loss(out.squeeze(), targets) 
                optimizer.zero_grad()

                out_cost.backward()

                ### UPDATE MODEL PARAMETERS
                optimizer.step()

        #-----eval models-------------------------------
        model.eval()

        y_preds = []
        y_trues = []
        with torch.no_grad():
            for features, targets in test_loader:
                features = features.to(device)
                targets = targets.to(device)
                outputs = model(features)
                y_preds.extend(outputs.squeeze().cpu().numpy())
                y_trues.extend(targets.squeeze().cpu().numpy())
  
        y_preds = np.array(y_preds)
        loss = amex_metric(np.array(y_trues), y_preds)
        #loss = roc_auc_score(y_trues, y_preds)        
        losses.append(loss)
        
    loss = np.mean(losses)
    print(loss)
    loss_dict.append({'params': params, 'losses': losses, 'mean_loss': loss})
    
    return {'loss': -loss, 'status': STATUS_OK}

In [28]:
from hyperopt import hp
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, anneal, rand
from functools import partial
def optimize(space, evals, cores, trials, optimizer=tpe.suggest, random_state=1234, n_startup_jobs=10):
    algo = partial(optimizer, n_startup_jobs=n_startup_jobs)
    best = fmin(score, space, algo=algo, max_evals=evals, trials = trials)
    print(best)
    return best

In [29]:
cores = 4
n=500
verbose = False
trials = Trials()

In [None]:
best_param = optimize(space,
                      evals = n,
                      optimizer=tpe.suggest,
                      cores = cores,
                      trials = trials, random_state=1234, 
                      n_startup_jobs=10)