# Training TCN (Temporal Convolutional Network) model

In addition to convolution and dropout, TCNs utilize dilation to enable larger receptive field sizes, introducing a fixed step size between every two adjacent convolutional filter taps. TCNs also employ residual connections so that each convolutional layer learns an identity mapping rather than the entire transformation, which helps stabilize deeper and wider models. Within each residual TCN block, layer normalization is applied to the output of the convolutional layer and dropout is added for regularization.

<center><img src="https://i.ibb.co/prTGdnT/TCN-arquitecture.png" alt="TCN-arquitecture" alt="TCN arquitecture" width="400"/></center>
<center>TCN model. Source: <a href="https://arxiv.org/pdf/2012.15330.pdf">Paper(arxiv)</a>.</center>

### Import libraries

In [1]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow_addons.layers import WeightNormalization as weight_norm
import pickle
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, FunctionTransformer, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import roc_auc_score, recall_score
from sklearn.model_selection import train_test_split
import numpy as np
import optuna 

2022-12-06 21:07:17.568729: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


### Load Dataset

In [11]:
with open('../data/dataset.pkl', 'rb') as f:
    features, labels = pickle.load(f)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

### Defining the model

In [2]:
class TCN_block(keras.Model):
    def __init__(self, filters, kernel_size, dilation, dropout=0.2):
        super(TCN_block, self).__init__()
        self.conv = weight_norm(layers.Conv1D(filters, kernel_size, padding="causal", activation='relu',
                                              dilation_rate=dilation))
        self.dropout = layers.Dropout(dropout)

    def call(self, inputs, training=False):
        x = self.conv(inputs)
        if training:
            x = self.dropout(x)
        return x + inputs

In [3]:
class TCN_Model(keras.Model):
    def __init__(self, n_features, dense_layer_size, kernel_size, dilation, dropout):
        super(TCN_Model, self).__init__()
        self.tcn1 = TCN_block(n_features, kernel_size, dilation, dropout=dropout)
        self.tcn2 = TCN_block(n_features, kernel_size, dilation, dropout=dropout)
        self.flatten = layers.Flatten()
        self.dense1 = layers.Dense(dense_layer_size, activation='relu')
        self.dense2 = layers.Dense(dense_layer_size//min(10,dense_layer_size), activation='relu')
        self.dense3 = layers.Dense(dense_layer_size//min(50,dense_layer_size), activation='relu')
        self.dense4 = layers.Dense(dense_layer_size//dense_layer_size, activation='sigmoid')
    
    def call(self, x):
        x = self.tcn1(x)
        x = self.tcn2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        return self.dense4(x)

### Define the Preprocessor

In [20]:
## Preprocessor proc
def outlier_handler(df):
    q1 = df.quantile(0.25)
    q3 = df.quantile(0.75)
    IQR = q3-q1
    lwr_bound = q1-(1.5*IQR)
    upr_bound = q3+(1.5*IQR)
    df = np.where(df > upr_bound, df.median(), np.where(df < lwr_bound, df.median(), df))
    return df

numeric_transformer = Pipeline([
    ('Outlier_handler', FunctionTransformer(outlier_handler)),
    ('Imputer', SimpleImputer(strategy='median'))
])

categorical_transformer = Pipeline([
    ('Imputer', SimpleImputer(strategy='constant', fill_value='Missing')),
    ('Binary_encoder', OneHotEncoder(sparse=False, drop='if_binary', handle_unknown='ignore'))
])

numeric_features = X_train.select_dtypes(['int64', 'float64']).columns
cat_cols = X_train.select_dtypes('category').columns
preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numeric_features),
    ("cat", categorical_transformer, cat_cols)
])

proc = Pipeline([
    ("preprocessor", preprocessor),
    ("scaler", StandardScaler())
])

### Obteining processed dataset

In [21]:
def process(X, y, proc_fit_transform):
    X = proc_fit_transform(X)
    X = X.reshape([X.shape[0], 1, X.shape[1]])
    y = y.to_numpy().reshape([X.shape[0], 1])
    return X, y

X_train_proc, y_train_proc= process(X_train, y_train, proc.fit_transform)
X_test_proc, y_test_proc= process(X_test, y_test, proc.transform)
X_val_proc, y_val_proc= process(X_val, y_val, proc.transform)



### Hyperparameter tunning with Optuna

In [None]:
def objective(trial):
    dense_layer_size = trial.suggest_int('dense_layer_size', 50, 10000, 50)
    kernel_size = trial.suggest_int('kernel_size', 1, 10)
    dilation = trial.suggest_int('dilation', 1, 10)
    dropout = trial.suggest_float('dropout', 0., 1.)

    model = TCN_Model(X_train_proc.shape[2], dense_layer_size, kernel_size, dilation, dropout)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
                loss=keras.losses.BinaryCrossentropy(),
                metrics=[keras.metrics.AUC(),
                        keras.metrics.Recall()])
    model.fit(X_train_proc, y_train_proc, batch_size=10000, epochs=epochs, verbose=0)                       
    y_pred = model.predict(X_val_proc)

    return recall_score(y_val_proc, y_pred.round())

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=300, timeout=60*60*12)

### Results of the hyperparameter tunning

In [17]:
study.best_params

{'dense_layer_size': 2850,
 'kernel_size': 9,
 'dilation': 7,
 'dropout': 0.06119379214886582}

### Trainning the best model

In [None]:
dense_layer_size = 2850
kernel_size = 9
dilation = 7
dropout = 0.06

model = TCN_Model(X_train_proc.shape[2], dense_layer_size, kernel_size, dilation, dropout)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
              loss=keras.losses.BinaryCrossentropy(),
              metrics=[keras.metrics.AUC(),
                       keras.metrics.Recall()])
hist = model.fit(X_train_proc, y_train_proc, batch_size=10000, epochs=100, 
          validation_data=(X_val_proc, y_val_proc), verbose=0)

### Make prediction

In [24]:
y_pred = model.predict(X_test_proc)
print(f'ROC_AUC: {roc_auc_score(y_test, y_pred):.4f}, Recall: {recall_score(y_test, y_pred.round()):.4f}')

ROC_AUC: 0.5425, Recall: 0.2087
