In [1]:
import numpy as np
import pandas as pd
import os
import random
import gc
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from tqdm.notebook import tqdm
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.callbacks import Callback

import scipy.signal as signal
from scipy.fft import fft, fftfreq
from scipy.signal import blackman
from scipy.signal import hilbert, chirp

from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import RobustScaler, normalize, LabelEncoder
from sklearn.model_selection import train_test_split, GroupKFold, KFold

from IPython.display import display

set_seed=2021
os.environ['PYTHONHASHSEED'] = str(set_seed)
random.seed(set_seed)
np.random.seed(set_seed)
tf.random.set_seed(set_seed)

ONE_FOLD_PLOT=True
ONE_FOLD_ONLY=True
COMPUTE_LSTM_IMPORTANCE=False

print(tf.__version__)

In [2]:
%%time

train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

all_pressure = np.sort(train.pressure.unique())
PRESSURE_MIN = all_pressure[0]
PRESSURE_MAX = all_pressure[-1]
PRESSURE_STEP = (all_pressure[1] - all_pressure[0])

## FE
1. https://www.kaggle.com/marutama/eda-about-time-step-and-u-out/notebook

In [3]:
broken_one_list = [3178, 16315, 18117, 24127, 28942, 39045, 46324, 54129, 55244, 72104, 76037, 87776, 104001, 119689, 120878]
broken_two_list = [36175, 38415, 44245, 55851, 74766, 109693, 111439]
u_out1_outlier = [44245, 118114, 120878]
u_in_last_outlier = [54086]

total_outlier_list = np.concatenate((np.array(broken_one_list), 
                                     np.array(broken_two_list), 
                                     np.array(u_out1_outlier), 
                                     np.array(u_in_last_outlier)), 
                                     axis=0)
print(len(total_outlier_list))
# 중복값 제거
total_outlier_list = set(total_outlier_list)
total_outlier_list = list(total_outlier_list) 
print(len(total_outlier_list))
print(len(total_outlier_list)*80)

In [4]:
print('original train shape: ', train.shape)
# outlier 제거
for outlier_list in total_outlier_list:
    # print(outlier_list)
    train.drop(index=train[train['breath_id']==outlier_list].index, inplace=True)
    
print('preprocessed train shape: ', train.shape)

In [5]:
%%time

def add_features(df):
    
    df['time_delta'] = (df['time_step'].diff())#.astype('float32')
    df['time_delta'].fillna(0, inplace=True)
    df['time_delta'].mask(df['time_delta'] < 0, 0, inplace=True) 
    df['delta'] = (df['time_delta'] * df['u_in'])#.astype('float32')
    df['area'] = (df.groupby('breath_id')['delta'].cumsum())#.astype('float32')

    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()#.astype('float32')
    
    #new ---
    #df['time_gap_shift2'] = (df['time_step'] - df['time_step'].shift(2).fillna(0))#.astype('float32')
    #df['u_in_gap_shift2'] = (df['u_in'] - df['u_in'].shift(2).fillna(0))#.astype('float32')
    #df['u_in_rate_shift2'] = (df['u_in_gap_shift2'] / df['time_gap_shift2'])#.astype('float32')
    #df = df.replace([np.inf, -np.inf], 0)
    # ---
    print('1 step compelte')
    
    #new ---
    df['one'] = 1
    df['count'] = (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] = (df['u_in_cumsum'] / df['count'])#.astype('float32')
    df = df.drop(['count','one'], axis=1)
    print('2 step compelte')
    # ---
 
    # new ---
    df['u_in_diff'] = ((df['u_in']).groupby(df['breath_id']).diff())#.astype('float32')
    df['u_in_diff'].fillna(0, inplace=True) 
    
    df['diff_sign'] = np.sign(df['u_in_diff'])
    df['sign_diff'] = (df['diff_sign']).groupby(df['breath_id']).diff()
    df['sign_diff'].fillna(0, inplace=True)
    df['sign_diff'] = abs(df['sign_diff']) 
    
    sign_diff_dict = df.groupby('breath_id')['sign_diff'].sum().to_dict()
    df['diff_vib'] = df['breath_id'].map(sign_diff_dict) 
    df['diff_vib'] = df['diff_vib']#.astype('float32')
    
    if 'diff_sign' in df.columns:
        df.drop(['diff_sign', 'sign_diff'], axis=1, inplace=True)
    print('3 step compelte')
    #---
    
    # new ---
    df['u_in_cumsum_u_out_0'] = df[df['u_out']==0]['u_in'].groupby(df['breath_id']).cumsum()
    df['u_in_cumsum_u_out_1'] = df[df['u_out']==1]['u_in'].groupby(df['breath_id']).cumsum()
    df['u_in_cumsum_u_out_0'].fillna(0, inplace=True)
    df['u_in_cumsum_u_out_1'].fillna(0, inplace=True)
    
    df['u_in_cumsum_max'] = df.groupby('breath_id')['u_in_cumsum'].transform('max')
    df['u_in_cumsum_u_out_0_max'] = df.groupby('breath_id')['u_in_cumsum_u_out_0'].transform('max')
    df['u_in_cumsum_u_out_1_max'] = df.groupby('breath_id')['u_in_cumsum_u_out_1'].transform('max')
    
    df['u_in_cumsum_ratio_0'] = (df['u_in_cumsum_u_out_0_max'] / df['u_in_cumsum_max'])#.astype('float32')
    df['u_in_cumsum_ratio_1'] = (df['u_in_cumsum_u_out_1_max'] / df['u_in_cumsum_max'])#.astype('float32')
    
    df = df.drop(['u_in_cumsum_max', 'u_in_cumsum_u_out_0_max', 'u_in_cumsum_u_out_1_max',
                  'u_in_cumsum_u_out_0', 'u_in_cumsum_u_out_1'], axis=1)
    print('4 step compelte')
    #---
    
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)#.astype('float32')
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)#.astype('float32')
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)#.astype('float32')
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)#.astype('float32')
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)#.astype('float32')
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)#.astype('float32')
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)#.astype('float32')
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)#.astype('float32')
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)#.astype('float32')
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)#.astype('float32')
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)#.astype('float32')
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)#.astype('float32')
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)#.astype('float32')
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)#.astype('float32')
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)#.astype('float32')
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)#.astype('float32')
    df = df.fillna(0)
    print('5 step compelte')
    
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')#.astype('float32')
    df['breath_id__u_out__max'] = df.groupby(['breath_id'])['u_out'].transform('max')#.astype('float32')
    
    df['u_in_diff1'] = (df['u_in'] - df['u_in_lag1'])#.astype('float32')
    df['u_out_diff1'] = (df['u_out'] - df['u_out_lag1'])#.astype('float32')
    df['u_in_diff2'] = (df['u_in'] - df['u_in_lag2'])#.astype('float32')
    df['u_out_diff2'] = (df['u_out'] - df['u_out_lag2'])#.astype('float32')
    
    df['breath_id__u_in__diffmax'] = (df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in'])#.astype('float32')
    df['breath_id__u_in__diffmean'] = (df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in'])#.astype('float32')
    
    df['u_in_diff3'] = (df['u_in'] - df['u_in_lag3'])#.astype('float32')
    df['u_out_diff3'] = (df['u_out'] - df['u_out_lag3'])#.astype('float32')
    df['u_in_diff4'] = (df['u_in'] - df['u_in_lag4'])#.astype('float32')
    df['u_out_diff4'] = (df['u_out'] - df['u_out_lag4'])#.astype('float32')
    df['cross']= (df['u_in']*df['u_out'])#.astype('float32')
    df['cross2']= (df['time_step']*df['u_out'])#.astype('float32')
    print('6 step compelte')

    # All entries are first point of each breath_id
    last_df = df.loc[79::80,:]
    
    # The Main mode DataFrame and flag
    main_df= last_df[(last_df['u_in']>4.8)&(last_df['u_in']<5.1)]
    main_mode_dict = dict(zip(main_df['breath_id'], [1]*len(main_df)))
    df['main_mode'] = df['breath_id'].map(main_mode_dict) 
    df['main_mode'].fillna(0, inplace=True)
    df['main_mode'] = df['main_mode']#.astype('int8')
    del last_df
    del main_df
    del main_mode_dict
    
    df['u_in_first_label'] = 0
    df['u_in_last_label'] = 0
    df['breath_id_u_in_first'] = df.groupby(['breath_id'])['u_in'].transform('first')
    df['breath_id_u_in_last'] = df.groupby(['breath_id'])['u_in'].transform('last')
    
    df.loc[(df['breath_id_u_in_first'] == 0.0), 'u_in_first_label'] = 1
    df.loc[(df['breath_id_u_in_first'] > 0.0) & (df['breath_id_u_in_first'] < 20), 'u_in_first_label'] = 2
    df.loc[(df['breath_id_u_in_first'] > 20) & (df['breath_id_u_in_first'] < 50), 'u_in_first_label'] = 3
    df.loc[(df['breath_id_u_in_first'] > 50) & (df['breath_id_u_in_first'] < 80), 'u_in_first_label'] = 4
    df.loc[(df['breath_id_u_in_first'] > 80) & (df['breath_id_u_in_first'] < 100), 'u_in_first_label'] = 5
    df.loc[(df['breath_id_u_in_first'] == 100.0), 'u_in_first_label'] = 6
    
    df.loc[(df['breath_id_u_in_last'] >= 0.0) & (df['breath_id_u_in_last'] < 0.75), 'u_in_last_label'] = 1
    df.loc[(df['breath_id_u_in_last'] >= 0.75) & (df['breath_id_u_in_last'] < 1.75), 'u_in_last_label'] = 2
    df.loc[(df['breath_id_u_in_last'] >= 1.75) & (df['breath_id_u_in_last'] < 2.1), 'u_in_last_label'] = 3
    df.loc[(df['breath_id_u_in_last'] >= 2.1) & (df['breath_id_u_in_last'] < 3.0), 'u_in_last_label'] = 4
    df.loc[(df['breath_id_u_in_last'] >= 3.0) & (df['breath_id_u_in_last'] < 4.8), 'u_in_last_label'] = 5
    df.loc[(df['breath_id_u_in_last'] >= 4.8) & (df['breath_id_u_in_last'] < 5.1), 'u_in_last_label'] = 6
    print('7 step compelte')
    
    #--add
    df['u_in_mean'] = df.groupby('breath_id')['u_in'].transform('mean')
    df['u_in_median'] = df.groupby('breath_id')['u_in'].transform('median')#.astype('float32')
    df['u_in_min'] = df.groupby('breath_id')['u_in'].transform('min')#.astype('float32')
    df['u_in_max'] = df.groupby('breath_id')['u_in'].transform('max')#.astype('float32')
    df['rolling_4_median'] = df.groupby('breath_id')['u_in'].rolling(window=4, min_periods=1).median().reset_index(level=0,drop=True).fillna(0)#.astype('float32')
    df['rolling_4_max'] = df.groupby('breath_id')['u_in'].rolling(window=4, min_periods=1).max().reset_index(level=0,drop=True).fillna(0)#.astype('float32')
    df['rolling_4_std'] = df.groupby('breath_id')['u_in'].rolling(window=4, min_periods=1).std().reset_index(level=0,drop=True).fillna(0)#.astype('float32')
    df['expand_median'] = df.groupby('breath_id')['u_in'].expanding(2).median().reset_index(level=0,drop=True).fillna(0)#.astype('float32')
    df['expand_max'] = df.groupby('breath_id')['u_in'].expanding(2).max().reset_index(level=0,drop=True).fillna(0)#.astype('float32')
    df['expand_std'] = df.groupby('breath_id')['u_in'].expanding(2).std().reset_index(level=0,drop=True).fillna(0)#.astype('float32')
    
    """
    df['ewm_u_in_mean'] = df.groupby('breath_id')['u_in'].transform(
        lambda x: x.ewm(halflife=10).mean()
    ).reset_index(level=0,drop=True).fillna(0)
    df['ewm_u_in_std'] = df.groupby('breath_id')['u_in'].transform(
        lambda x: x.ewm(halflife=10).std()
    ).reset_index(level=0,drop=True).fillna(0)
    df['ewm_u_in_corr'] = df.groupby('breath_id')['u_in'].transform(
        lambda x: x.ewm(halflife=10).corr()
    ).reset_index(level=0,drop=True).fillna(0)
    """
    #-- 
     
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df['u_in_last_label'] = df['u_in_last_label'].astype(str)
    df['u_in_first_label'] = df['u_in_first_label'].astype(str)

    df = pd.get_dummies(df)
    
    #df['R_C'] = [f'{r:02}_{c:02}' for r, c in zip(df['R'], df['C'])]
    print('8 step compelte')
    
    """
    # new ---
    # Number of sample points
    N = 80
    w = blackman(N+1)

    ffta = lambda x: np.abs(fft(np.append(x.values,x.values[0]))[:80])
    ffta.__name__ = 'ffta'

    fftw = lambda x: np.abs(fft(np.append(x.values,x.values[0])*w)[:80])
    fftw.__name__ = 'fftw'

    df['fft_u_in'] = (df.groupby('breath_id')['u_in'].transform(ffta))
    df['fft_u_in_w'] = df.groupby('breath_id')['u_in'].transform(fftw)
    df['fft_u_in_w'] = (df['fft_u_in_w'].replace(0,1e-6))

    df['analytical'] = (df.groupby('breath_id')['u_in'].transform(hilbert))
    df['envelope'] = (np.abs(df['analytical']))
    df['phase'] = (np.angle(df['analytical']))
    df['unwrapped_phase'] = (df.groupby('breath_id')['phase'].transform(np.unwrap))
    df['phase_shift1'] = (df.groupby('breath_id')['unwrapped_phase'].shift(1))
    df['IF'] = (df['unwrapped_phase'] - df['phase_shift1'])
    
    print('9 step compelte')
    """
    
    return df

train = add_features(train)
test = add_features(test)

In [None]:
%%time
def labelencoding(df, column):
    le = LabelEncoder()
    le.fit(df[column])
    return le.transform(df[column])

df_list = [train, test]
column_list = ['R', 'C', 'R_C', 'u_in_last_label', 'u_in_first_label']

for df in df_list:
    for column in column_list:
        df[column+'_le'] = labelencoding(df, column)
        df[column+'_le'] = df[column+'_le'].astype('int8')

## reduce mem.

In [6]:
def reduce_mem_usage(df: pd.DataFrame, verbose: bool = True) -> pd.DataFrame:
    numerics = ["int16", "int32", "int64", "float16", "float32", "float64"]
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                """
                if (
                    c_min > np.finfo(np.float16).min
                    and c_max < np.finfo(np.float16).max
                ):
                    df[col] = df[col].astype(np.float16)
                el
                """
                if (
                    c_min > np.finfo(np.float32).min
                    and c_max < np.finfo(np.float32).max
                ):
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024 ** 2
    if verbose:
        print(
            "Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)".format(
                end_mem, 100 * (start_mem - end_mem) / start_mem
            )
        )
    return df

train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

## Normal

In [7]:
%time
targets = train[['pressure']].to_numpy().reshape(-1, 80)
#u_outs = train[['u_out']].to_numpy().reshape(-1, 80)

# -- base --
train.drop(['pressure', 'id', 'breath_id'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id'], axis=1)

#train.drop(['R', 'C', 'R_C', 'u_in_last_label'], axis=1, inplace=True)
#test = test.drop(['R', 'C', 'R_C', 'u_in_last_label'], axis=1)

#train.drop(['R', 'C', 'R_C', 'u_in_last_label', 'u_in_first_label'], axis=1, inplace=True)
#test = test.drop(['R', 'C', 'R_C', 'u_in_last_label', 'u_in_first_label'], axis=1)
# ----------

# -- additional --
#train.drop(['time_delta', 'time_gap_shift2', 'u_in_first_label_le'], axis=1, inplace=True)
#test = test.drop(['time_delta', 'time_gap_shift2', 'u_in_first_label_le'], axis=1)
# ----------------

COLS = list(train.columns)

RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

print('targets shape: ', np.shape(targets))
print('train shape: ', np.shape(train))
print('test shape: ', np.shape(test))

gc.collect()

## Custom Loss

In [None]:
# u_out_index must be set to a correct value!
def ventilation_mae_loss(y_true, y_pred, input_tensor, u_out_index=2):
    w = 1 - tf.expand_dims(input_tensor[:, :, u_out_index], axis=2)
    mae = w * K.abs(y_true - y_pred)
    return K.mean(K.sum(mae, axis=-1) / K.sum(w, axis=-1))


def GBVPP_loss(y_true, y_pred, cols = 80):
    u_out = y_true[:, cols: ]
    y = y_true[:, :cols ]
    w = 1 - u_out
    mae = w * tf.abs(y - y_pred)
    return tf.reduce_sum(mae, axis=-1) / tf.reduce_sum(w, axis=-1)

In [8]:
def plot_hist(hist):
    plt.plot(hist.history["loss"])
    plt.plot(hist.history["val_loss"])
    plt.title("model performance")
    plt.ylabel("mean_absolute_error")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

## Base Model 

In [None]:
hidden_base = [1024, 512, 256, 128, 128, 1]
hidden_ver3 = [800, 600, 400, 200, 50, 1]

def base_model(input_shape):

                                # train.shape[-2:] = (80, 50)
    input_1 = keras.layers.Input(shape=input_shape, name='input')
        
    LSTM_1 = keras.layers.Bidirectional(
        keras.layers.LSTM(hidden_ver3[0],
                          #kernel_initializer='LecunUniform',
                          #activation='tanh',
                          return_sequences=True,
                          dropout=0.0,
                          stateful=False,
                          name='LSTM_1'
                         )
    )(input_1)
    
    LSTM_2 = keras.layers.Bidirectional(
        keras.layers.LSTM(hidden_ver3[1],
                          #kernel_initializer='LecunUniform',
                          #activation='tanh',
                          return_sequences=True,
                          dropout=0.1,
                          stateful=False,
                          name='LSTM_2'
                         )
    )(LSTM_1)
    
    LSTM_3 = keras.layers.Bidirectional(
        keras.layers.LSTM(hidden_ver3[2],
                          #kernel_initializer='LecunUniform',
                          #activation='tanh',
                          return_sequences=True,
                          dropout=0.1,
                          stateful=False,
                          name='LSTM_3'
                         )
    )(LSTM_2)
    
    LSTM_4 = keras.layers.Bidirectional(
        keras.layers.LSTM(hidden_ver3[3],
                          #kernel_initializer='LecunUniform',
                          #activation='tanh',
                          return_sequences=True,
                          dropout=0.0,
                          stateful=False,
                          name='LSTM_4'
                         )
    )(LSTM_3)
    
    Dense_1 = keras.layers.Dense(hidden_ver3[4], 
                                 #kernel_initializer='LecunUniform',
                                 activation='selu',
                                 name='Dense_1'
                                )(LSTM_4)
    
    out = keras.layers.Dense(hidden_ver3[5],
                             activation='linear',
                             name='out'
                            )(Dense_1)
    
    model = keras.Model(inputs=input_1, outputs=out)
    
    return model

## New Model

In [None]:
def base_model(input_shape):

                                # train.shape[-2:] = (80, 50)
    x_input = keras.layers.Input(shape=input_shape, name='input')
    
    x1 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=768, 
                          dropout=0.0,
                          return_sequences=True))(x_input)
    
    x2 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=512, 
                          dropout=0.1,
                          return_sequences=True))(x1)
    
    x3 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=384, 
                          dropout=0.1,
                          return_sequences=True))(x2)
    
    x4 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=256, 
                          dropout=0.1,
                          return_sequences=True))(x3)
    
    x5 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=128, 
                          dropout=0.0,
                          return_sequences=True))(x4)
    
    z2 = keras.layers.Bidirectional(
        keras.layers.GRU(units=384, 
                         return_sequences=True))(x2)
    
    z31 = keras.layers.Multiply()([x3, z2])
    z31 = keras.layers.BatchNormalization()(z31)
    z3 = keras.layers.Bidirectional(
        keras.layers.GRU(units=256, 
                         return_sequences=True))(z31)
    
    z41 = keras.layers.Multiply()([x4, z3])
    z41 = keras.layers.BatchNormalization()(z41)
    z4 = keras.layers.Bidirectional(
        keras.layers.GRU(units=128, 
                         return_sequences=True))(z41)
    
    z51 = keras.layers.Multiply()([x5, z4])
    z51 = keras.layers.BatchNormalization()(z51)
    z5 = keras.layers.Bidirectional(
        keras.layers.GRU(units=64, 
                         return_sequences=True))(z51)
    
    x = keras.layers.Concatenate(axis=2)([x5, z2, z3, z4, z5])

    x = keras.layers.Dense(units=128, activation='selu')(x)
    
    x_output = keras.layers.Dense(units=1)(x)

    model = keras.Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

In [None]:
def base_model(input_shape):

                                # train.shape[-2:] = (80, 50)
    x_input = keras.layers.Input(shape=input_shape, name='input')
    
    x1 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=768, 
                          dropout=0.0,
                          return_sequences=True))(x_input)
    
    x2 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=640, 
                          dropout=0.1,
                          return_sequences=True))(x1)
    
    x3 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=512, 
                          dropout=0.1,
                          return_sequences=True))(x2)
    
    x4 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=384, 
                          dropout=0.1,
                          return_sequences=True))(x3)
    
    x5 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=256, 
                          dropout=0.1,
                          return_sequences=True))(x4)
    
    x6 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=128, 
                          dropout=0.0,
                          return_sequences=True))(x5)
    
    z1 = keras.layers.Bidirectional(
        keras.layers.GRU(units=256, 
                         return_sequences=True))(x3)
    
    z21 = keras.layers.Multiply()([x5, z1])
    z21 = keras.layers.BatchNormalization()(z21)
    z3 = keras.layers.Bidirectional(
        keras.layers.GRU(units=128, 
                         return_sequences=True))(z21)
    
    z31 = keras.layers.Multiply()([x6, z3])
    z31 = keras.layers.BatchNormalization()(z31)
    z4 = keras.layers.Bidirectional(
        keras.layers.GRU(units=64, 
                         return_sequences=True))(z31)
    
    x = keras.layers.Concatenate(axis=2)([x6, z4])
    
    x = keras.layers.Dense(units=128, activation='selu')(x)
    
    x_output = keras.layers.Dense(units=1)(x)

    model = keras.Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

In [None]:
def base_model(input_shape):

                                # train.shape[-2:] = (80, 50)
    x_input = keras.layers.Input(shape=input_shape, name='input')
    
    x1 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=448, 
                          dropout=0.0,
                          return_sequences=True))(x_input)
    
    x2 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=320, 
                          dropout=0.1,
                          return_sequences=True))(x1)
    
    x3 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=192, 
                          dropout=0.1,
                          return_sequences=True))(x2)
    
    x4 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=64, 
                          dropout=0.0,
                          return_sequences=True))(x3)

    x = keras.layers.Dense(units=128, activation='selu')(x41)
    
    x_output = keras.layers.Dense(units=1)(x)

    model = keras.Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

In [9]:
layer_unit = [448, 320, 192, 64]
add_unit = 0

def LSTM_block(x_input):
    
    x1 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=layer_unit[0]+add_unit, 
                          dropout=0.0,
                          return_sequences=True))(x_input)
    
    x2 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=layer_unit[1]+add_unit, 
                          dropout=0.2,
                          return_sequences=True))(x1)
    
    x3 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=layer_unit[2]+add_unit, 
                          dropout=0.1,
                          return_sequences=True))(x2)
    
    x4 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=layer_unit[3]+add_unit, 
                          dropout=0.0,
                          return_sequences=True))(x3)
    
    return x4


def GRU_block(x_input):
    
    x1 = keras.layers.Bidirectional(
        keras.layers.GRU(units=layer_unit[0]+add_unit, 
                          dropout=0.0,
                          return_sequences=True))(x_input)
    
    x2 = keras.layers.Bidirectional(
        keras.layers.GRU(units=layer_unit[1]+add_unit, 
                          dropout=0.2,
                          return_sequences=True))(x1)
    
    x3 = keras.layers.Bidirectional(
        keras.layers.GRU(units=layer_unit[2]+add_unit, 
                          dropout=0.1,
                          return_sequences=True))(x2)
    
    x4 = keras.layers.Bidirectional(
        keras.layers.GRU(units=layer_unit[3]+add_unit, 
                          dropout=0.0,
                          return_sequences=True))(x3)
    
    return x4

In [10]:
layer_unit2 = [320, 192, 64]
add_unit2 = 0

def LSTM_block2(x_input):
    
    x1 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=layer_unit2[0]+add_unit2, 
                          dropout=0.0,
                          return_sequences=True))(x_input)
    
    x2 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=layer_unit2[1]+add_unit2, 
                          dropout=0.1,
                          return_sequences=True))(x1)
    
    x3 = keras.layers.Bidirectional(
        keras.layers.LSTM(units=layer_unit2[2]+add_unit2, 
                          dropout=0.0,
                          return_sequences=True))(x2)
    
    return x3


def GRU_block2(x_input):
    
    x1 = keras.layers.Bidirectional(
        keras.layers.GRU(units=layer_unit2[0]+add_unit2, 
                          dropout=0.0,
                          return_sequences=True))(x_input)
    
    x2 = keras.layers.Bidirectional(
        keras.layers.GRU(units=layer_unit2[1]+add_unit2, 
                          dropout=0.1,
                          return_sequences=True))(x1)
    
    x3 = keras.layers.Bidirectional(
        keras.layers.GRU(units=layer_unit2[2]+add_unit2, 
                          dropout=0.0,
                          return_sequences=True))(x2)
    
    return x3

In [11]:
def base_model(input_shape):

                                # train.shape[-2:] = (80, 50)
    x_input = keras.layers.Input(shape=input_shape, name='input')
    
    block_output_1 = LSTM_block(x_input)
    block_output_2 = GRU_block(x_input)
    
    mul1 = keras.layers.Multiply()([block_output_1, block_output_2])
    add1 = keras.layers.Add()([block_output_1, block_output_2])
    concat_1 = keras.layers.Concatenate(axis=2)([mul1, add1])
    concat_1 = keras.layers.BatchNormalization()(concat_1)
    
    block_output_3 = LSTM_block2(concat_1)
    block_output_4 = GRU_block2(concat_1)
    
    mul2 = keras.layers.Multiply()([block_output_3, block_output_4])
    add2 = keras.layers.Add()([block_output_3, block_output_4])
    concat_2 = keras.layers.Concatenate(axis=2)([mul2, add2])
    concat_2 = keras.layers.BatchNormalization()(concat_2)
    
    x = keras.layers.Bidirectional(
        keras.layers.LSTM(units=128, 
                          dropout=0.0,
                          return_sequences=True))(concat_2)
    
    x = keras.layers.Dense(units=128, activation='selu')(x)
    
    x_output = keras.layers.Dense(units=1)(x)

    model = keras.Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

In [12]:
model = base_model(train.shape[-2:])
model.summary()

## TPU

In [None]:
%%time

EPOCH=200
BATCH_SIZE=1024
NUM_FOLDS=10

# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

with tpu_strategy.scope():
    kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=2021)
    test_preds = []
    history_loss = [] 
    history_val_loss = []
    
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        #u_out_train, u_out_valid = u_outs[train_idx], u_outs[test_idx]    
        
        U_OUT_IDX = 2
        y_weight = np.ones_like(y_train)
        u_out_values = X_train[:, :, U_OUT_IDX]
        y_weight[u_out_values==0] = 0
    
        model = base_model(train.shape[-2:])
        
        #model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss=GBVPP_loss)
        #model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mae')
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3), loss='mae',
                  sample_weight_mode="temporal")
        
        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, verbose=1)
        es = EarlyStopping(monitor="val_loss", patience=20, verbose=1, mode="min", restore_best_weights=True)
    
        checkpoint_filepath = f"folds{fold}.hdf5"
        sv = keras.callbacks.ModelCheckpoint(
            checkpoint_filepath, monitor='val_loss', verbose=1, save_best_only=True,
            save_weights_only=False, mode='auto', save_freq='epoch',
            options=None
        )
        
        history = model.fit(
            #X_train, np.append(y_train, u_out_train, axis=1), 
            X_train, y_train, 
            #validation_data=(X_valid, np.append(y_valid, u_out_valid, axis=1)),
            validation_data=(X_valid, y_valid),
            sample_weight=y_weight.reshape((-1, 80, 1)),
            epochs=EPOCH, 
            batch_size=BATCH_SIZE, 
            callbacks=[lr, es, sv]
        ) 
        
        history_loss.append(history.history['loss'])
        history_val_loss.append(history.history['val_loss'])
        
        test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())
        
        if ONE_FOLD_PLOT:
            plot_hist(history)
        
        if COMPUTE_LSTM_IMPORTANCE:
            results = []
            print(' Computing LSTM feature importance...')
            
            # COMPUTE BASELINE (NO SHUFFLE)
            oof_preds = model.predict(X_valid, verbose=0).squeeze()
            baseline_mae = np.mean(np.abs( oof_preds-y_valid ))
            results.append({'feature':'BASELINE','mae':baseline_mae})           

            for k in tqdm(range(len(COLS))):
                
                # SHUFFLE FEATURE K
                save_col = X_valid[:,:,k].copy()
                np.random.shuffle(X_valid[:,:,k])
                        
                # COMPUTE OOF MAE WITH FEATURE K SHUFFLED
                oof_preds = model.predict(X_valid, verbose=0).squeeze() 
                mae = np.mean(np.abs( oof_preds-y_valid ))
                results.append({'feature':COLS[k],'mae':mae})
                X_valid[:,:,k] = save_col
         
            # DISPLAY LSTM FEATURE IMPORTANCE
            print()
            df = pd.DataFrame(results)
            df = df.sort_values('mae')
            plt.figure(figsize=(10,20))
            plt.barh(np.arange(len(COLS)+1),df.mae)
            plt.yticks(np.arange(len(COLS)+1),df.feature.values)
            plt.title('LSTM Feature Importance',size=16)
            plt.ylim((-1,len(COLS)+1))
            plt.plot([baseline_mae,baseline_mae],[-1,len(COLS)+1], '--', color='orange',
                     label=f'Baseline OOF\nMAE={baseline_mae:.3f}')
            plt.xlabel(f'Fold {fold+1} OOF MAE with feature permuted',size=14)
            plt.ylabel('Feature',size=14)
            plt.legend()
            plt.show()
                               
            # SAVE LSTM FEATURE IMPORTANCE
            df = df.sort_values('mae',ascending=False)
            df.to_csv(f'lstm_feature_importance_fold_{fold+1}.csv',index=False)
            
        del X_train, X_valid, y_train, y_valid, model#, u_out_train, u_out_valid
        gc.collect()
        
        # ONLY DO ONE FOLD
        if ONE_FOLD_ONLY: break

In [None]:
postfix = '_fold_10'
if ONE_FOLD_ONLY:
    NUM_FOLDS = 1
    postfix = '_fold_1'

In [None]:
val_loss = []
for idx in range(NUM_FOLDS):
    val_loss.append(np.min(history_val_loss[idx]))
    print(np.min(history_val_loss[idx]))

print('----------------------------------')
cv_score = np.round(np.mean(val_loss), 4)
print('10 folds mean cv score is ', cv_score)

In [None]:
# ENSEMBLE FOLDS WITH MEAN
submission["pressure"] = sum(test_preds)/NUM_FOLDS
submission.to_csv('submission_mean_cv:'+str(cv_score)+postfix+'.csv', index=False)

# ENSEMBLE FOLDS WITH MEDIAN
submission["pressure"] = np.median(np.vstack(test_preds),axis=0)
submission.to_csv('submission_median_cv:'+str(cv_score)+postfix+'.csv', index=False)

In [None]:
# ENSEMBLE FOLDS WITH MEDIAN AND ROUND PREDICTIONS
submission["pressure"] =\
    np.round( (submission.pressure - PRESSURE_MIN)/PRESSURE_STEP ) * PRESSURE_STEP + PRESSURE_MIN
submission.pressure = np.clip(submission.pressure, PRESSURE_MIN, PRESSURE_MAX)
submission.to_csv('submission_median_round_cv:'+str(cv_score)+postfix+'.csv', index=False)

In [None]:
!ls

In [None]:
os.chdir(r'../working')
from IPython.display import FileLink
FileLink(r'folds9.hdf5')