In [None]:
import numpy as np 
import pandas as pd 
from tensorflow import keras
from tensorflow.keras.layers import *
from tensorflow.keras import *
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.model_selection import train_test_split, GroupKFold, KFold

from IPython.display import display
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
df_test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

In [None]:
def feat_engine(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    df = df.fillna(0)
    
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_out__max'] = df.groupby(['breath_id'])['u_out'].transform('max')
    
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']
    
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    
    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']
    df['cross']= df['u_in']*df['u_out']
    df['cross2']= df['time_step']*df['u_out']
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df = pd.get_dummies(df)
    return df

train_data = feat_engine(df)


In [None]:
targets = train_data[['pressure']].to_numpy().reshape(-1, 80)
train_data.drop(['pressure', 'id', 'breath_id'], axis=1, inplace=True)
scale = RobustScaler()
train_data = scale.fit_transform(train_data)
train_data = train_data.reshape(-1, 80, train_data.shape[-1])

In [None]:
test = feat_engine(df_test)
test = test.drop(['id', 'breath_id'], axis=1)
test = scale.transform(test)
test = test.reshape(-1, 80, train_data.shape[-1])

In [None]:
train_data.shape

In [None]:
def get_hardware_strategy():
    try:
        # TPU detection. No parameters necessary if TPU_NAME environment variable is
        # set: this is always the case on Kaggle.
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        tpu = None

    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        tf.config.optimizer.set_jit(True)
    else:
        # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
        strategy = tf.distribute.get_strategy()

    return tpu, strategy

tpu, strategy = get_hardware_strategy()

In [None]:
def create_gru_model():

    x0 = tf.keras.layers.Input(shape=(train_data.shape[-2], train_data.shape[-1]))  

    GRU_layers = 4
    GRU_units = [1471,792,682,419]
    GRU = Bidirectional(keras.layers.GRU(GRU_units[0], return_sequences=True))(x0)
    for i in range(GRU_layers-1):
        GRU = Bidirectional(keras.layers.GRU(GRU_units[i+1], return_sequences=True))(GRU)    
    GRU = Dropout(0.13359276043323767)(GRU)
    dense_units = GRU_units[-1]
    GRU = Dense(dense_units, activation='selu')(GRU)
    GRU = Dense(1)(GRU)
    model = keras.Model(inputs=x0, outputs=GRU)
    metrics = ["mae"]
    model.compile(optimizer="adam", loss="mae", metrics=metrics)
    
    return model

In [None]:
model.summary()

In [None]:
print({'GRU_units_L1': 1471, 'GRU_units_L2': 792, 'GRU_units_L3': 682, 'GRU_units_L4': 419, 'GRU_dropout': 0.13359276043323767, 'activation': 'selu'})
print({'GRU_units_L1': 1364, 'GRU_units_L2': 822, 'GRU_units_L3': 799, 'GRU_units_L4': 696, 'GRU_dropout': 0.03266155304659389, 'activation': 'selu'})

In [None]:
EPOCH = 350
BATCH_SIZE = 512
NUM_FOLDS = 5

with strategy.scope():
    kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=714)
    test_preds = []
    for fold, (train_idx, test_idx) in enumerate(kf.split(train_data, targets)):
        print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
        X_train, X_valid = train_data[train_idx], train_data[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        model = create_gru_model()
        model.compile(optimizer="adam", loss="mae")
        #scheduler = ExponentialDecay(1e-3, 400*((len(train_data)*0.8)/BATCH_SIZE), 1e-5)
        #lr = LearningRateScheduler(scheduler, verbose=1)
        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, verbose=1)
        filepath = f"GRU_model2_{fold}.hdf5"
        check_point = ModelCheckpoint(filepath, monitor='val_loss',verbose = 1,save_best_only=True, mode='min')
        early_stop = EarlyStopping(monitor='val_loss', patience = 30,verbose = 1,restore_best_weights=True,mode='min')
        call_backs = [check_point,early_stop,lr]
        
        
        model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=EPOCH, batch_size=BATCH_SIZE, callbacks=call_backs)
        test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())

In [None]:
submission["pressure"] = sum(test_preds)/NUM_FOLDS
submission.to_csv('submission.csv', index=False)

In [None]:
submission

In [None]:
submission.to_csv('/kaggle/working/submission.csv')

In [None]:
submission.to_csv('submission.csv', index=False)

In [None]:
!ls