In [1]:
import pandas as pd
import numpy  as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from tensorflow import keras
import tensorflow as tf

In [2]:
train_data = pd.read_csv('train.csv')
test_data  = pd.read_csv('test.csv')
submission = pd.read_csv('sample_submission.csv')

In [3]:
for df in (train_data, test_data):
    df['u_in_lag'] = df.groupby('breath_id')['u_in'].shift(2).fillna(method="backfill")
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['last_value_u_in'] = df.groupby('breath_id')['u_in'].transform('last')
    df['u_in_mean'] = df.groupby('breath_id')['u_in'].transform('mean')
    df['u_in_median'] = df.groupby('breath_id')['u_in'].transform('median')
    df['first_value_u_in'] = df.groupby('breath_id')['u_in'].transform('first')
    df['u_in_min'] = df.groupby('breath_id')['u_in'].transform('min')
    df['u_in_max'] = df.groupby('breath_id')['u_in'].transform('max')
    df['u_in_delta'] = df['u_in_max'] - df['u_in_min']

In [4]:
train_data.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure,u_in_lag,u_in_cumsum,last_value_u_in,u_in_mean,u_in_median,first_value_u_in,u_in_min,u_in_max,u_in_delta
0,1,1,20,50,0.0,0.083334,0,5.837492,0.083334,0.083334,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036
1,2,1,20,50,0.033652,18.383041,0,5.907794,0.083334,18.466375,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036
2,3,1,20,50,0.067514,22.509278,0,7.876254,0.083334,40.975653,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036
3,4,1,20,50,0.101542,22.808822,0,11.742872,18.383041,63.784476,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036
4,5,1,20,50,0.135756,25.35585,0,12.234987,22.509278,89.140326,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036


In [5]:
test_data.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,u_in_lag,u_in_cumsum,last_value_u_in,u_in_mean,u_in_median,first_value_u_in,u_in_min,u_in_max,u_in_delta
0,1,0,5,20,0.0,0.0,0,0.0,0.0,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219
1,2,0,5,20,0.031904,7.515046,0,0.0,7.515046,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219
2,3,0,5,20,0.063827,14.651675,0,0.0,22.166721,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219
3,4,0,5,20,0.095751,21.23061,0,7.515046,43.397331,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219
4,5,0,5,20,0.127644,26.320956,0,14.651675,69.718287,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219


In [6]:
targets = train_data[['pressure']].to_numpy().reshape(-1, 80)

# drop the unwanted features
train_data.drop(['pressure', 'id', 'breath_id', 'u_out'], axis=1, inplace=True)
test_data =  test_data.drop(['id', 'breath_id', 'u_out'], axis=1)

In [7]:
train_data.head()

Unnamed: 0,R,C,time_step,u_in,u_in_lag,u_in_cumsum,last_value_u_in,u_in_mean,u_in_median,first_value_u_in,u_in_min,u_in_max,u_in_delta
0,20,50,0.0,0.083334,0.083334,0.083334,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036
1,20,50,0.033652,18.383041,0.083334,18.466375,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036
2,20,50,0.067514,22.509278,0.083334,40.975653,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036
3,20,50,0.101542,22.808822,18.383041,63.784476,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036
4,20,50,0.135756,25.35585,22.509278,89.140326,4.987079,10.146007,4.922568,0.083334,0.0,28.313036,28.313036


In [8]:
test_data.head()

Unnamed: 0,R,C,time_step,u_in,u_in_lag,u_in_cumsum,last_value_u_in,u_in_mean,u_in_median,first_value_u_in,u_in_min,u_in_max,u_in_delta
0,5,20,0.0,0.0,0.0,0.0,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219
1,5,20,0.031904,7.515046,0.0,7.515046,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219
2,5,20,0.063827,14.651675,0.0,22.166721,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219
3,5,20,0.095751,21.23061,7.515046,43.397331,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219
4,5,20,0.127644,26.320956,14.651675,69.718287,4.973375,9.327338,4.683875,0.0,0.0,37.542219,37.542219


In [9]:
targets.shape
#targets[0]

(75450, 80)

In [10]:
from sklearn.preprocessing import RobustScaler
RS = RobustScaler()
train_data = RS.fit_transform(train_data)
test_data  = RS.transform(test_data)

In [11]:
n_features = train_data.shape[-1]

train_data = train_data.reshape(-1, 80, n_features)
test_data  = test_data.reshape(-1, 80, n_features)

n_epochs = 50
n_splits =  5

In [12]:
n_features

13

In [13]:
train_data.shape

(75450, 80, 13)

In [14]:
targets.shape

(75450, 80)

In [None]:
from tcn import TCN, tcn_full_summary
kf = KFold(n_splits=n_splits, shuffle=False)
test_preds = []

for fold, (train_idx, test_idx) in enumerate(kf.split(train_data, targets)):
    print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
    X_train, X_valid = train_data[train_idx], train_data[test_idx]
    y_train, y_valid = targets[train_idx], targets[test_idx]
    
    scheduler = tf.keras.optimizers.schedules.ExponentialDecay(1e-3, 200*((len(test_data)*0.8)/1024), 1e-5)
    
    model = keras.models.Sequential([
        TCN(input_shape=(80, n_features), nb_filters=256, return_sequences=True, dilations=[1, 2, 4, 8, 16, 32]),
        keras.layers.Dense(1)
    ])
    
    model.compile(optimizer="adam", loss="mae",
                  metrics=keras.metrics.MeanAbsoluteError())
    
    history = model.fit(X_train, y_train, 
                        validation_data=(X_valid, y_valid), 
                        epochs=n_epochs, 
                        batch_size=1024, 
                        callbacks=[tf.keras.callbacks.LearningRateScheduler(scheduler)])
    
    model.save(f'Fold{fold+1} weights')
    test_preds.append(model.predict(test_data).squeeze().reshape(-1, 1).squeeze())

--------------- > Fold 1 < ---------------
Epoch 1/50