In [1]:
import os
import gc
import random

import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
import matplotlib.pyplot as plt

import bloscpack as bp

from tsfresh.feature_extraction import feature_calculators

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR

from sklearn.model_selection import StratifiedKFold, RepeatedStratifiedKFold, GroupKFold
from sklearn.metrics import f1_score

from NNs import *

In [2]:
DEVICE = 'cuda:0'
EPOCHS = 128
BATCHSIZE = 16
SEED = 19550423
LR = 0.001
SPLITS = 5

torch.manual_seed(SEED)

<torch._C.Generator at 0x7f0bc1ef0cb0>

In [3]:
step_tag = 's1000'
wndw_tag = 's1000'
vers_tag = 'final'

trn_fs_w1000 = sorted([f for f in os.listdir('../input/feats_srs') if (('trn_srs_dat' in f) and (step_tag in f) and (wndw_tag in f) and (vers_tag in f))])
tst_fs_w1000 = sorted([f for f in os.listdir('../input/feats_srs') if (('tst_srs_dat' in f) and (step_tag in f) and (wndw_tag in f) and (vers_tag in f))])
tst_fs_w1000 = [tst_fs_w1000[i] for i in [0, 11, 12, 13, 14, 15, 16, 17, 18, 19]] + tst_fs_w1000[1:11]

step_tag = 's500'
wndw_tag = 's500'
vers_tag = 'final'

trn_fs_w500 = sorted([f for f in os.listdir('../input/feats_srs') if (('trn_srs_dat' in f) and (step_tag in f) and (wndw_tag in f) and (vers_tag in f))])
tst_fs_w500 = sorted([f for f in os.listdir('../input/feats_srs') if (('tst_srs_dat' in f) and (step_tag in f) and (wndw_tag in f) and (vers_tag in f))])
tst_fs_w500 = [tst_fs_w500[i] for i in [0, 11, 12, 13, 14, 15, 16, 17, 18, 19]] + tst_fs_w500[1:11]

series_trn_w500 = np.concatenate(
    [bp.unpack_ndarray_from_file(os.path.join('../input/feats_srs', f)) for f in trn_fs_w500],
    axis=0
)

series_tst_w500 = np.concatenate(
    [bp.unpack_ndarray_from_file(os.path.join('../input/feats_srs', f)) for f in tst_fs_w500],
    axis=0
)

series_trn_w1000 = np.concatenate(
    [bp.unpack_ndarray_from_file(os.path.join('../input/feats_srs', f)) for f in trn_fs_w1000],
    axis=0
)

series_tst_w1000 = np.concatenate(
    [bp.unpack_ndarray_from_file(os.path.join('../input/feats_srs', f)) for f in tst_fs_w1000],
    axis=0
)

In [4]:
trn_fs_w500_tn = bp.unpack_ndarray_from_file(os.path.join('../input/feats_srs', 'trn_srs_dat_all_s500_w500_feat_target_encoded.bp'))
tst_fs_w500_tn = bp.unpack_ndarray_from_file(os.path.join('../input/feats_srs', 'tst_srs_dat_all_s500_w500_feat_target_encoded.bp'))
trn_fs_w1000_tn = bp.unpack_ndarray_from_file(os.path.join('../input/feats_srs', 'trn_srs_dat_all_s1000_w1000_feat_target_encoded.bp'))
tst_fs_w1000_tn = bp.unpack_ndarray_from_file(os.path.join('../input/feats_srs', 'tst_srs_dat_all_s1000_w1000_feat_target_encoded.bp'))

In [5]:
for i in range(series_trn_w500.shape[-1]):

    avg = series_trn_w500[:, :, i].mean()
    std = series_trn_w500[:, :, i].std()
    series_tst_w500[:, :, i] = (series_tst_w500[:, :, i] - avg) / std
    
    print('progress: {:02d} / {:02d}; '.format(i+1, series_trn_w500.shape[-1]), end='\r')
    
for i in range(series_trn_w1000.shape[-1]):

    avg = series_trn_w1000[:, :, i].mean()
    std = series_trn_w1000[:, :, i].std()
    series_tst_w1000[:, :, i] = (series_tst_w1000[:, :, i] - avg) / std
    
    print('progress: {:02d} / {:02d}; '.format(i+1, series_trn_w1000.shape[-1]), end='\r')

progress: 30 / 30; 

In [6]:
avg = trn_fs_w500_tn[:, :, 0].mean()
std = trn_fs_w500_tn[:, :, 0].std()
tst_fs_w500_tn[:, :, 0] = (tst_fs_w500_tn[:, :, 0] - avg) / std

avg = trn_fs_w1000_tn[:, :, 0].mean()
std = trn_fs_w1000_tn[:, :, 0].std()
tst_fs_w1000_tn[:, :, 0] = (tst_fs_w1000_tn[:, :, 0] - avg) / std

In [7]:
mdls = sorted([f for f in os.listdir('./saved_models/') if '.pth' in f])
#mdls = sorted([f for f in os.listdir('./saved_models/') if ('.pth' in f) and ('encoded' in f)])

In [8]:
class Waveset(Dataset):
    def __init__(self, data, labels=None):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data = self.data[idx]
        
        if self.labels is None:
            return data.astype(np.float32)
        else:
            labels = self.labels[idx]
            return (data.astype(np.float32), labels.astype(int))

In [9]:
submission = pd.read_csv('../input/sample_submission.csv', dtype={'time': str, 'open_channels': 'Int64'})

In [10]:
# submission_pred = np.zeros(shape=(submission.shape[0], 11))
submission_pred = torch.zeros(size=(submission.shape[0], 11))

shape0 = series_tst_w500.shape[-1]
shape1 = series_tst_w1000.shape[-1]
shape2 = tst_fs_w500_tn.shape[-1]
shape3 = tst_fs_w1000_tn.shape[-1]

for i in range(len(mdls)):
    print('-------- model {} --------'.format(mdls[i]))
    if 'pure_RNN' in mdls[i]:
        if 'final_feats' in mdls[i]:
            mdl = RNN_Classifier(shape0 if 'w500' in mdls[i] else shape1).to(DEVICE)
        elif 'new_encoded_feats' in mdls[i]:
            mdl = RNN_Classifier(shape2 if 'w500' in mdls[i] else shape3).to(DEVICE)
    elif 'wave_RNN' in mdls[i]:
        if 'final_feats' in mdls[i]:
            mdl = WaveRNN_Classifier(shape0 if 'w500' in mdls[i] else shape1).to(DEVICE)
        elif 'new_encoded_feats' in mdls[i]:
            mdl = WaveRNN_Classifier(shape2 if 'w500' in mdls[i] else shape3).to(DEVICE)
    elif 'wave_net' in mdls[i]:
        if 'final_feats' in mdls[i]:
            mdl = Wave_Classifier(shape0 if 'w500' in mdls[i] else shape1).to(DEVICE)
        elif 'new_encoded_feats' in mdls[i]:
            mdl = Wave_Classifier(shape2 if 'w500' in mdls[i] else shape3).to(DEVICE)
    elif 'wave_TRSFM_shallow' in mdls[i]:
        if 'final_feats' in mdls[i]:
            mdl = WaveTRSFM_Classifier_shallow(shape0 if 'w500' in mdls[i] else shape1).to(DEVICE)
        elif 'new_encoded_feats' in mdls[i]:
            mdl = WaveTRSFM_Classifier_shallow(shape2 if 'w500' in mdls[i] else shape3).to(DEVICE)
    elif 'RNN_TRSFM_shallow' in mdls[i]:
        if 'final_feats' in mdls[i]:
            mdl = RnnTRSFM_Classifier(shape0 if 'w500' in mdls[i] else shape1).to(DEVICE)
        elif 'new_encoded_feats' in mdls[i]:
            mdl = RnnTRSFM_Classifier(shape2 if 'w500' in mdls[i] else shape3).to(DEVICE)
        
    if 'w500' in mdls[i]:
        if 'new_encoded_feats' in mdls[i]:
            waveset_tst = Waveset(tst_fs_w500_tn)
        elif 'final_feats' in mdls[i]:
            waveset_tst = Waveset(series_tst_w500)
        else:
            print('error: {};'.format(mdls[i]))
            break
    elif 'w1000' in mdls[i]:
        if 'new_encoded_feats' in mdls[i]:
            waveset_tst = Waveset(tst_fs_w1000_tn)
        elif 'final_feats' in mdls[i]:
            waveset_tst = Waveset(series_tst_w1000)
        else:
            print('error: {};'.format(mdls[i]))
            break
        
    loader_tst = DataLoader(waveset_tst, BATCHSIZE, shuffle=False, num_workers=2, pin_memory=True)
    
    mdl_weight = torch.load(os.path.join('./saved_models/', mdls[i]))
    print('model validation loss: {:.3f}; validation f1: {:.3f};'.format(mdl_weight['loss'], mdl_weight['f1']))
    mdl.load_state_dict(mdl_weight['model'])
    mdl.eval()
    with torch.no_grad():
        tst_fold_prd = []
        for tst_batch_dat in loader_tst:
            tst_batch_prd = mdl(tst_batch_dat.to(DEVICE))
            tst_batch_prd = tst_batch_prd.view(-1, tst_batch_prd.size(-1)).detach().cpu()#.numpy()
            tst_fold_prd.append(F.softmax(tst_batch_prd, 1))
            
        #fold_result = np.concatenate(tst_fold_prd, 0)
        fold_result = torch.cat(tst_fold_prd, 0)
        submission_pred += fold_result
        
    

-------- model RNN_TRSFM_shallow_model_new_encoded_feats_s500_w500_fold000_checkpoint.pth --------
model validation loss: 0.083; validation f1: 0.939;
-------- model RNN_TRSFM_shallow_model_new_encoded_feats_s500_w500_fold001_checkpoint.pth --------
model validation loss: 0.084; validation f1: 0.939;
-------- model RNN_TRSFM_shallow_model_new_encoded_feats_s500_w500_fold002_checkpoint.pth --------
model validation loss: 0.082; validation f1: 0.940;
-------- model RNN_TRSFM_shallow_model_new_encoded_feats_s500_w500_fold003_checkpoint.pth --------
model validation loss: 0.082; validation f1: 0.940;
-------- model RNN_TRSFM_shallow_model_new_encoded_feats_s500_w500_fold004_checkpoint.pth --------
model validation loss: 0.082; validation f1: 0.939;
-------- model RNN_TRSFM_shallow_model_new_encoded_feats_s500_w500_fold005_checkpoint.pth --------
model validation loss: 0.082; validation f1: 0.940;
-------- model pure_RNN_model_final_feats_ss1000_ww1000_fold000_checkpoint.pth --------
model 

In [12]:
submission_pred = submission_pred / 111

In [13]:
submission_pred.sum(1)

tensor([1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000])

In [26]:
submission_pred[submission_pred > .95]

tensor([0.9854, 0.9615, 0.9978, 0.9919, 0.9833, 0.9510, 0.9961, 0.9884, 0.9971,
        0.9912, 0.9576, 0.9514, 0.9545, 0.9614, 0.9638, 0.9911, 0.9859, 0.9904,
        0.9769])

In [14]:
submission_pred[(submission_pred > .1) & (submission_pred < .9)].shape

torch.Size([186048])

In [14]:
submission_pred_softmax = torch.nn.functional.softmax(submission_pred, dim=1)

In [17]:
submission_pred_softmax[(submission_pred_softmax > .1) & (submission_pred_softmax < .9)].shape

torch.Size([2318])

In [15]:
np.save('../input/nn_test_predictions_softmax', submission_pred.numpy())

In [50]:
submission['open_channels'] = submission_pred.argmax(1)
submission.to_csv("../submissions/sub_nn_models_bigbag_66mdls.csv", index=False)