# Applications d'algo Deep Learning (NN) adaptés aux Time Series

Il existe plusieurs types de modèles adaptés aux Time Series. Leur particularité est de ne pas utiliser simplement les données comme des évenements indépendants mais de conserver une "mémoire" des évenements précédents pour mieux analyser un instant T.

Recemment les modeles de type Transformer avec attention qui ont connus de gros succès en NLP, ont été adaptés pour des Timeseries avec des résultats qui dépassent ceux des autres types de modèles (GRU, LSTM, ...). L'avantage de ces modèles, avec le système d'attention est que contrairement aux RNN qui ont des fenêtrages de temps fixés sur une periode donnée, ceux-ci peuvent détecter des pattern sur du très long terme. 


#### First of all set randomeness in order to have comparable results

In [1]:
from numpy.random import seed
seed(1)
import tensorflow as tf
tf.random.set_seed(2)

## Input parameters

To be reviewed:adapt before 1st launch

In [2]:
modelName = 'NN_TS_TFTS_CUSTO_KDD_TRANSFORMER_01'

In [3]:
pathModelWeights = 'weights/' + modelName + '_WEIGHTS.h5'
pathModel = 'model/' + modelName + '_MODEL.h5'

## Reproduction du modèle utilisé lors du challenge KDD de 2022

3e place du concours. 

Sur la base du code GIT : https://github.com/LongxingTan/KDDCup2022-WPF

On va :
1/ Reproduire le traitement réalisé ici danbs un premier temps avec le dataset original (Wind Power)
2/ Adapter le traitement à notre timeseries

In [4]:
# pip install psycopg2-binary

In [5]:
import time
import numpy as np
import random
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
import os.path
import joblib
import itertools
import functools

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
# pip install attention

In [8]:
from sklearn.model_selection import train_test_split, ShuffleSplit
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Convolution1D, MaxPooling1D, Flatten
from tensorflow.keras.layers import LSTM, GRU, TimeDistributed, Conv1D, ConvLSTM2D, BatchNormalization
from attention import Attention
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import Input, Model, layers
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ModelCheckpoint

from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold


In [9]:
#pip install tfts

In [10]:
import tfts
from tfts import AutoModel, AutoConfig, KerasTrainer
from tfts.models.seq2seq import Seq2seq
from tfts.models.wavenet import WaveNet
from tfts.models.transformer import Transformer
from tfts.models.bert import Bert


## Classes KDD 

In [11]:
class DataReader(object):
    def __init__(
        self, 
        data, 
        train_sequence_length=10*24*6, 
        predict_sequence_length=48 * 6, 
        idx=None,  # start idx for 
        target_aggs=1,
        target_column_idx=[-2, -1],  # target and target
        feature_column_idx_short=[0],
        feature_column_idx_long=[-2, -1],
        ): 
        """ 
        data: 2D array, for each idx, choose its history and target
        """    
        self.train_sequence_length = train_sequence_length
        self.predict_sequence_length = predict_sequence_length
        self.target_aggs = target_aggs
        self.target_column_idx = target_column_idx
        self.feature_column_idx_short = feature_column_idx_short
        self.feature_column_idx_long = feature_column_idx_long

        if idx is None:
            drop_idx = data.groupby(['TurbID']).tail(predict_sequence_length - 1).index.tolist()
            drop_idx += data.groupby(['TurbID']).head(train_sequence_length).index.tolist()
            all_idx = data.index.to_list()
            self.idx = [i for i in all_idx if i not in drop_idx]
        else:
            self.idx = idx
        self.data = data.values
    
    def __len__(self):
        return len(self.idx)

    def __getitem__(self, idx):
        train_begin_idx = idx - self.train_sequence_length
        test_end_idx = idx + self.predict_sequence_length
        turbine_id = self.data[idx, 0]
        raw= self.data[train_begin_idx:idx, self.feature_column_idx_short]  # train_seq * 10
        raw_long = self.data[train_begin_idx: test_end_idx, self.feature_column_idx_long]
        
        teacher = self.data[idx: test_end_idx, self.target_column_idx[0]: self.target_column_idx[0]+1]
        target = self.data[idx: test_end_idx, self.target_column_idx]
        
        return {'inputs': (turbine_id, raw, raw_long), 'teacher': teacher}, target

    def iter(self):
        for i in self.idx:
            yield self[i]

In [12]:
class DataLoader(object):
    def __init__(self, data_reader, short_feature_size=10, long_feature_size=2, target_column_size=2):
        self.data_reader = data_reader
        self.train_sequence_length = data_reader.train_sequence_length
        self.predict_sequence_length = data_reader.predict_sequence_length
        self.target_aggs = data_reader.target_aggs
        self.short_feature_size = short_feature_size
        self.long_feature_size = long_feature_size
        self.target_column_size = target_column_size

    def __call__(self, batch_size, shuffle=False, drop_remainder=False): 
        dataset = tf.data.Dataset.from_generator(
            self.data_reader.iter,
            # output_types=({'inputs':(tf.int32, tf.float32, tf.float32), 'teacher': tf.float32}, tf.float32),
            output_signature=({'inputs': 
            (tf.TensorSpec(shape=(), dtype=tf.int32), 
            tf.TensorSpec(shape=(self.train_sequence_length, self.short_feature_size), dtype=tf.float32),
            tf.TensorSpec(shape=(self.train_sequence_length+self.predict_sequence_length, self.long_feature_size), dtype=tf.float32)), 
            'teacher': tf.TensorSpec(shape=(self.predict_sequence_length//self.target_aggs, 1), dtype=tf.float32)}, 
            tf.TensorSpec(shape=(self.predict_sequence_length//self.target_aggs, self.target_column_size ), dtype=tf.float32)
        ))

        if shuffle:
            dataset = dataset.shuffle(buffer_size=1000)
        dataset = dataset.batch(batch_size, drop_remainder=drop_remainder).prefetch(tf.data.experimental.AUTOTUNE)
        return dataset

## Préparation des données

#### nn_train.build_data

In [13]:
data = pd.read_csv('KDD/raw/wtbdata_245days.csv')

In [14]:
data.head()

Unnamed: 0,TurbID,Day,Tmstamp,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv
0,1,1,00:00,,,,,,,,,,
1,1,1,00:10,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66
2,1,1,00:20,6.27,-2.18,30.6,41.63,20.91,1.0,1.0,1.0,-0.24,509.76
3,1,1,00:30,6.42,-0.73,30.52,41.52,20.91,1.0,1.0,1.0,-0.26,542.53
4,1,1,00:40,6.25,0.89,30.49,41.38,20.91,1.0,1.0,1.0,-0.23,509.36


In [15]:
print(data.shape, np.max(data['Day']))

(4727520, 13) 245


In [16]:
data['start_time'] = data['Day'].astype(str) + ' ' + data['Tmstamp'].astype(str) # Flag of the first of 288

In [17]:
# Split between 
train_days = range(1, 181)  # range(1, 231)
valid_days = range(231, 246)  # range(231, 246)

In [18]:
valid_df_raw = data.loc[data['Day'].isin(valid_days)]  #  dataset part reserved after training for validation metrics

In [19]:
valid_df_raw.shape

(289440, 14)

In [20]:
# Replace missing values with next non missing value or prev for the same Turbine ID
data[['Wspd', 'Wdir', 'Patv']] = data.groupby(['TurbID'])[['Wspd', 'Wdir', 'Patv']].apply(lambda x: x.ffill().bfill())

In [21]:
data.head()

Unnamed: 0,TurbID,Day,Tmstamp,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv,start_time
0,1,1,00:00,6.17,-3.99,,,,,,,,494.66,1 00:00
1,1,1,00:10,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66,1 00:10
2,1,1,00:20,6.27,-2.18,30.6,41.63,20.91,1.0,1.0,1.0,-0.24,509.76,1 00:20
3,1,1,00:30,6.42,-0.73,30.52,41.52,20.91,1.0,1.0,1.0,-0.26,542.53,1 00:30
4,1,1,00:40,6.25,0.89,30.49,41.38,20.91,1.0,1.0,1.0,-0.23,509.36,1 00:40


In [22]:
# Calculate extrat time Features informations
data['Hour'] = pd.to_datetime(data['Tmstamp'], format='%H:%M').dt.hour
data['Minute'] = pd.to_datetime(data['Tmstamp'], format='%H:%M').dt.minute  
data['MinuteofDay'] = data['Hour'] * 6 + data['Minute'] / 10   
data['DayofWeek'] = data['Day'] // 7

In [23]:
data['is_valid'] = 1  # 1 means valid

In [24]:
# Scaling to normalize data and optimize 
scaler = MinMaxScaler()
data[['Wspd', 'Wdir', 'Prtv']] = scaler.fit_transform(data[['Wspd', 'Wdir', 'Prtv']])

In [25]:
data.head(2)

Unnamed: 0,TurbID,Day,Tmstamp,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv,start_time,Hour,Minute,MinuteofDay,DayofWeek,is_valid
0,1,1,00:00,0.23469,0.571311,,,,,,,,494.66,1 00:00,0,0,0.0,0,1
1,1,1,00:10,0.23469,0.571311,30.73,41.8,25.92,1.0,1.0,1.0,0.562729,494.66,1 00:10,0,10,1.0,0,1


#### Create rolling indicators, with different lags and different aggreagate functions

In [26]:
roll_column='Wspd'
period=6
lags=[6, 144]
id_col='TurbID'
agg_funs=['mean', 'max']
feature_cols=[]

In [27]:
for lag in lags:
    for agg_fun in agg_funs:
        feature_col = roll_column + '_lag{}_roll{}_{}'.format(lag, period, agg_fun)
        feature_cols.append(feature_col)
        if id_col is not None:
            data[feature_col] = data.groupby(id_col)[roll_column].transform(lambda x: x.shift(lag+1).rolling(period).agg(agg_fun))
        else:
            data[feature_col] = data[roll_column].shift(lag+1).rolling(period).agg(agg_fun)

In [28]:
feature_column_short = ['Wspd', 'Wdir', 'Etmp', 'Itmp', 'Ndir', 'Pab1', 'Pab2', 'Pab3', 'Prtv', 'Patv']
feature_column_long = ['DayofWeek', 'Hour', 'MinuteofDay']
target_column = ['Patv', 'is_valid'] # Patv is Wind Power average -> value to predict

In [29]:
feature_cols

['Wspd_lag6_roll6_mean',
 'Wspd_lag6_roll6_max',
 'Wspd_lag144_roll6_mean',
 'Wspd_lag144_roll6_max']

In [30]:
feature_column_short += feature_cols

In [31]:
data = data.reset_index(drop=True)

#### get column index (target, short, long)

In [32]:
target_column_idx = [data.columns.get_loc(c) for c in target_column]
feature_column_idx_short = [data.columns.get_loc(c) for c in feature_column_short]
feature_column_idx_long = [data.columns.get_loc(c) for c in feature_column_long]

In [33]:
day_columns='Day' 
mode='train'
train_sequence_length = 2 * 24 * 6  # 2 weeks (6 open days per week)
predict_sequence_length = 2 * 24 * 6
strides= 1
max_lags=288

In [34]:
# get end of the datasets Index (size = predicted Size expected)
def func(data):
    return data.tail(predict_sequence_length - 1).index.tolist()
# get beginning of dataset index
def func2(data):
    return data.head(max(train_sequence_length, max_lags) + 1).index.tolist()

In [35]:
cpu_count = os.cpu_count()

#### Calculate IDX (TRAIN) to be removed 

=> Does not have enough pre values for rolling or next values for predicted size test)

In [36]:
# Get all index matching with Day = training days
all_idx = data.loc[data[day_columns].isin(train_days)].index.tolist()

In [37]:
data_grouped = data.groupby(['TurbID'])

In [38]:
data_grouped.head()

Unnamed: 0,TurbID,Day,Tmstamp,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,...,start_time,Hour,Minute,MinuteofDay,DayofWeek,is_valid,Wspd_lag6_roll6_mean,Wspd_lag6_roll6_max,Wspd_lag144_roll6_mean,Wspd_lag144_roll6_max
0,1,1,00:00,0.234690,0.571311,,,,,,...,1 00:00,0,0,0.0,0,1,,,,
1,1,1,00:10,0.234690,0.571311,30.73,41.80,25.92,1.00,1.00,...,1 00:10,0,10,1.0,0,1,,,,
2,1,1,00:20,0.238494,0.571653,30.60,41.63,20.91,1.00,1.00,...,1 00:20,0,20,2.0,0,1,,,,
3,1,1,00:30,0.244199,0.571927,30.52,41.52,20.91,1.00,1.00,...,1 00:30,0,30,3.0,0,1,,,,
4,1,1,00:40,0.237733,0.572232,30.49,41.38,20.91,1.00,1.00,...,1 00:40,0,40,4.0,0,1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4692240,134,1,00:00,0.184481,0.570116,,,,,,...,1 00:00,0,0,0.0,0,1,,,,
4692241,134,1,00:10,0.184481,0.570116,30.83,30.59,7.30,0.23,0.23,...,1 00:10,0,10,1.0,0,1,,,,
4692242,134,1,00:20,0.164701,0.571508,30.79,30.58,-4.94,0.03,0.03,...,1 00:20,0,20,2.0,0,1,,,,
4692243,134,1,00:30,0.161658,0.571608,30.77,30.50,-4.95,0.04,0.04,...,1 00:30,0,30,3.0,0,1,,,,


In [39]:
# remoove last indexes from data (not enough next val to match with predicted size expected)
dropidx = joblib.Parallel(cpu_count)(joblib.delayed(func)(group) for name, group in data_grouped)
dropidx = list(itertools.chain(*dropidx))

In [40]:
# remoove First indexes from data not enough values for the rolling features to be calculated
dropidx2 = joblib.Parallel(cpu_count)(joblib.delayed(func2)(group) for name, group in data_grouped)
dropidx2 = list(itertools.chain(*dropidx2))      
dropidx += dropidx2 

In [41]:
# Remoove Drop index calculated from the whole list of Training index
train_idx  = sorted(list(set(all_idx) - set(dropidx)))

#### Reader object used to store and give access to Training data reading

In [42]:
BATCH_SIZE=32

In [43]:
target_aggs = 1

In [44]:
train_data_reader = DataReader(
        data, 
        train_sequence_length, 
        predict_sequence_length, 
        idx=train_idx, 
        target_aggs=target_aggs, 
        target_column_idx=target_column_idx, 
        feature_column_idx_short=feature_column_idx_short, 
        feature_column_idx_long=feature_column_idx_long)

In [45]:
train_data_loader = DataLoader(
        train_data_reader, 
        len(feature_column_short), 
        len(feature_column_long))(batch_size=1024, shuffle=True, drop_remainder=True)

#### Calculate IDX (VALID) to be removed 

=> Does not have enough pre values for rolling or next values for predicted size test)

In [46]:
# Get all index matching with Day = valid days
all_idx = data.loc[data[day_columns].isin(valid_days)].index.tolist()

In [47]:
data_grouped = data.groupby(['TurbID'])

In [48]:
# remoove last indexes from data (not enough next val to match with predicted size expected)
dropidx = joblib.Parallel(cpu_count)(joblib.delayed(func)(group) for name, group in data_grouped)
dropidx = list(itertools.chain(*dropidx))

In [49]:
val_idx = sorted(list(set(all_idx) - set(dropidx)))

In [50]:
valid_data_reader = DataReader(
    data, 
    train_sequence_length, 
    predict_sequence_length, 
    idx=val_idx, 
    target_aggs=target_aggs, 
    target_column_idx=target_column_idx, 
    feature_column_idx_short=feature_column_idx_short, 
    feature_column_idx_long=feature_column_idx_long)

In [51]:
valid_data_loader = DataLoader(
    valid_data_reader, 
    len(feature_column_short), 
    len(feature_column_long))(batch_size=1024, shuffle=True, drop_remainder=True)

In [52]:
valid_df = data.iloc[val_idx]

In [53]:
print(len(train_data_reader), len(valid_data_reader), len(valid_df)) 

3434554 250982 250982


## Prepare Model

In [54]:
def build_model(use_model, train_sequence_length, predict_sequence_length=288, target_aggs=1, short_feature_nums=10, long_feature_nums=1):
    inputs = (
        Input([1]),
        Input([train_sequence_length, short_feature_nums]),  # raw feature numbers
        Input([train_sequence_length+predict_sequence_length, long_feature_nums])  # long feature
        )
    teacher_inputs = Input([predict_sequence_length//target_aggs, 1])

    ts_inputs = KDD(train_sequence_length, predict_sequence_length)(inputs)
    outputs = build_tfts_model(
        use_model=use_model, 
        predict_sequence_length=predict_sequence_length//target_aggs, 
        custom_model_params=cfg.custom_model_params)(ts_inputs, teacher_inputs)

    model = tf.keras.Model(inputs={'inputs':inputs, 'teacher': teacher_inputs}, outputs=outputs)
    return model

In [55]:
def build_model(use_model, train_sequence_length, predict_sequence_length=288, target_aggs=1, short_feature_nums=10, long_feature_nums=1):
    inputs = (
        Input([1]),
        Input([train_sequence_length, short_feature_nums]),  # raw feature numbers
        Input([train_sequence_length+predict_sequence_length, long_feature_nums])  # long feature
        )
    teacher_inputs = Input([predict_sequence_length//target_aggs, 1])

    ts_inputs = KDD(train_sequence_length, predict_sequence_length)(inputs)
    outputs = build_tfts_model(
        use_model=use_model, 
        predict_sequence_length=predict_sequence_length//target_aggs, 
        custom_model_params=cfg.custom_model_params)(ts_inputs, teacher_inputs)

    model = tf.keras.Model(inputs={'inputs':inputs, 'teacher': teacher_inputs}, outputs=outputs)
    return model

In [56]:
use_model = 'bert' # Bidirectional Encoder Representations from Transformers

In [57]:
short_feature_nums=len(feature_column_short)
long_feature_nums=len(feature_column_long)

#### Define Model Format Inputs

=> TO DO ++ = Describe Each part of inputs usage

In [58]:
inputs = (
        Input([1]),
        Input([train_sequence_length, short_feature_nums]),  # raw feature numbers
        Input([train_sequence_length+predict_sequence_length, long_feature_nums])  # long feature
        )

In [59]:
teacher_inputs = Input([predict_sequence_length//target_aggs, 1])

In [60]:
#### ts_inputs = KDD(train_sequence_length, predict_sequence_length)(inputs)

In [61]:
# REF = KDD object
_, raw, raw_long  = inputs  # feature is here

In [62]:
raw

<KerasTensor: shape=(None, 288, 14) dtype=float32 (created by layer 'input_2')>

In [63]:
raw_long

<KerasTensor: shape=(None, 576, 3) dtype=float32 (created by layer 'input_3')>

In [64]:
# Split Row (14 features) in 2 (10 features / 4 features) 
raw, manual = tf.split(raw, [10, tf.shape(raw)[-1]-10], axis=-1)

In [65]:
raw  # Original features

<KerasTensor: shape=(None, 288, 10) dtype=float32 (created by layer 'tf.split')>

In [66]:
manual # Feature engineering

<KerasTensor: shape=(None, 288, 4) dtype=float32 (created by layer 'tf.split')>

#### Split Raw features (10) in 10 different layers (explicit for wind_speed, wind dir, active power)

In [67]:
wind_speed, wind_dir, _, _, _, _, _, _, _, active_power = tf.split(raw, 10, axis=-1)

In [68]:
wind_speed

<KerasTensor: shape=(None, 288, 1) dtype=float32 (created by layer 'tf.split_1')>

In [69]:
wind_dir

<KerasTensor: shape=(None, 288, 1) dtype=float32 (created by layer 'tf.split_1')>

In [70]:
active_power

<KerasTensor: shape=(None, 288, 1) dtype=float32 (created by layer 'tf.split_1')>

In [71]:
manual = tf.where(tf.math.is_nan(manual), tf.zeros_like(manual), manual)

In [72]:
manual

<KerasTensor: shape=(None, 288, 4) dtype=float32 (created by layer 'tf.where')>

#### Split Manual features (3) in 3 different layers (explicit for day_of_week, hour_feature, minute_of_day)

In [73]:
day_of_week, hour_feature, minute_of_day = tf.split(raw_long, 3, axis=-1)

In [74]:
day_of_week

<KerasTensor: shape=(None, 576, 1) dtype=float32 (created by layer 'tf.split_2')>

In [75]:
# Normalization ?
hour_feature = hour_feature / 23 - 0.5       # 24 hours per day                           (-> [-0.5 : 0.5])
minute_of_day = minute_of_day / 143 - 0.5    # 6 periods of 10 mn per hour (144/6/24 = 1) (-> [-0.5 : 0.5])
day_of_week = day_of_week / 6 - 0.5          # 6 days per week                            (-> [-0.5 : 0.5])

# Remark YLE -> This representation hides the fact that hours are continuous (23h and 00h are close but represented far here)

In [76]:
hour_feature

<KerasTensor: shape=(None, 576, 1) dtype=float32 (created by layer 'tf.math.subtract_1')>

#### Create decode layers with only Predict Sequence length

In [77]:
_, decoder_hour_feature = tf.split(hour_feature, [train_sequence_length, predict_sequence_length], axis=1)       
_, decoder_minute_feature = tf.split(minute_of_day, [train_sequence_length, predict_sequence_length], axis=1)
_, decoder_day_feature = tf.split(day_of_week, [train_sequence_length, predict_sequence_length], axis=1)

In [78]:
decoder_hour_feature

<KerasTensor: shape=(None, 288, 1) dtype=float32 (created by layer 'tf.split_3')>

In [79]:
encoder_features = tf.concat([wind_speed, wind_dir], axis=-1)

In [80]:
# Encode features with 288 timesteps Window as past values (2 weeks * 6 days * 24 hours)
encoder_features

<KerasTensor: shape=(None, 288, 2) dtype=float32 (created by layer 'tf.concat')>

In [81]:
# Decoder features with 3 features + 288 timesteps Window as future predicted values(2 weeks * 6 days * 24 hours)
decoder_features = tf.concat([decoder_hour_feature, decoder_minute_feature, decoder_day_feature], axis=-1)

In [82]:
decoder_features = tf.cast(decoder_features, tf.float32)

#### All inputs layers have been created :

- Encoder : Windspeed & Windirection features * 288 Timesteps Windows (last past values)
- Decoder : Day, Hour, Minute features * 288 Timesteps Windows (Future values to predict)
- active_power : Target layer to predict (WindPower) for the future 288 timesteps

In [83]:
print('Feature shape Encoder : ', encoder_features.shape)
print('Feature shape Decoder : ', decoder_features.shape)
print('Target shape : ', active_power.shape)

Feature shape Encoder :  (None, 288, 2)
Feature shape Decoder :  (None, 288, 3)
Target shape :  (None, 288, 1)


In [84]:
ts_inputs = active_power, encoder_features, decoder_features

#### Design Model

In [85]:
custom_model_params = {
    'n_encoder_layers': 1,
    'use_token_embedding': False,
    'attention_hidden_sizes': 32*1,
    'num_heads': 1,
    'attention_dropout': 0.,
    'ffn_hidden_sizes': 32,
    'ffn_filter_sizes': 32,  # should be same with attention_hidden_sizes
    'ffn_dropout': 0.,
    'layer_postprocess_dropout': 0.,
    'skip_connect': False
}

In [86]:
def build_tfts_model(use_model, predict_sequence_length, custom_model_params=None):
    if use_model.lower() == "seq2seq":
        Model = Seq2seq(predict_sequence_length=predict_sequence_length, custom_model_params=custom_model_params)
    elif use_model.lower() == "wavenet":
        Model = WaveNet(predict_sequence_length=predict_sequence_length, custom_model_params=custom_model_params)
    elif use_model.lower() == "transformer":
        Model = Transformer(predict_sequence_length=predict_sequence_length, custom_model_params=custom_model_params)
    elif use_model.lower() == "bert":
        Model = Bert(predict_sequence_length=predict_sequence_length, custom_model_params=custom_model_params)
    else:
        raise ValueError("unsupported use_model of {} yet".format(use_model))
    return Model

In [87]:
ts_inputs

(<KerasTensor: shape=(None, 288, 1) dtype=float32 (created by layer 'tf.split_1')>,
 <KerasTensor: shape=(None, 288, 2) dtype=float32 (created by layer 'tf.concat')>,
 <KerasTensor: shape=(None, 288, 3) dtype=float32 (created by layer 'tf.cast')>)

In [88]:
teacher_inputs

<KerasTensor: shape=(None, 288, 1) dtype=float32 (created by layer 'input_4')>

In [89]:
outputs = build_tfts_model(
        use_model=use_model, 
        predict_sequence_length=predict_sequence_length//target_aggs, 
        custom_model_params=custom_model_params)(ts_inputs, teacher_inputs)

In [90]:
outputs

<KerasTensor: shape=(None, 288, 1) dtype=float32 (created by layer 'tf.expand_dims')>

#### Custom Loss fonction (based on Mean Square Error) used for the Gradient Descent

In [91]:
model = tf.keras.Model(inputs={'inputs':inputs, 'teacher': teacher_inputs}, outputs=outputs)

In [92]:
def custom_loss(y_true, y_pred):   
    true, mask = tf.split(y_true, 2, axis=-1)   
    mask = tf.cast(mask, dtype=tf.float32)  
    true *= mask
    y_pred *= mask
    rmse_score = tf.math.sqrt(tf.reduce_mean(tf.square(true - y_pred)) + 1e-9)
    return rmse_score

In [93]:
optimizer = tf.keras.optimizers.Adam(learning_rate = 5e-3)
loss_fn = custom_loss  

In [94]:
inputs

(<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'input_1')>,
 <KerasTensor: shape=(None, 288, 14) dtype=float32 (created by layer 'input_2')>,
 <KerasTensor: shape=(None, 576, 3) dtype=float32 (created by layer 'input_3')>)

In [95]:
trainer = KerasTrainer(model, loss_fn=loss_fn, optimizer=optimizer, strategy=None)

In [96]:
fit_params = {
        'n_epochs': 3,
        'batch_size': 1024,
        'learning_rate': 5e-3,
        'verbose': 1,
        'checkpoint': ModelCheckpoint(
            'checkpoint/nn_{}.h5'.format(use_model), 
            monitor='val_loss', 
            save_weights_only=True, 
            save_best_only=False, 
            verbose=1),      
    }

In [97]:
raise Exception('coucou')

Exception: coucou

In [98]:
train_data_loader.get_single_element()

<PrefetchDataset element_spec=({'inputs': (TensorSpec(shape=(1024,), dtype=tf.int32, name=None), TensorSpec(shape=(1024, 288, 14), dtype=tf.float32, name=None), TensorSpec(shape=(1024, 576, 3), dtype=tf.float32, name=None)), 'teacher': TensorSpec(shape=(1024, 288, 1), dtype=tf.float32, name=None)}, TensorSpec(shape=(1024, 288, 2), dtype=tf.float32, name=None))>

In [None]:
trainer.train(train_data_loader, valid_dataset=valid_data_loader, **fit_params) 

In [106]:
train_data_loader

<PrefetchDataset element_spec=({'inputs': (TensorSpec(shape=(1024,), dtype=tf.int32, name=None), TensorSpec(shape=(1024, 288, 14), dtype=tf.float32, name=None), TensorSpec(shape=(1024, 576, 3), dtype=tf.float32, name=None)), 'teacher': TensorSpec(shape=(1024, 288, 1), dtype=tf.float32, name=None)}, TensorSpec(shape=(1024, 288, 2), dtype=tf.float32, name=None))>

In [104]:
data[0]

TypeError: 'TakeDataset' object is not subscriptable

In [111]:
dataset_train = valid_data_loader[1]

TypeError: 'PrefetchDataset' object is not subscriptable

In [112]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 288, 14)]    0           []                               
                                                                                                  
 tf.compat.v1.shape (TFOpLambda  (3,)                0           ['input_2[0][0]']                
 )                                                                                                
                                                                                                  
 tf.__operators__.getitem (Slic  ()                  0           ['tf.compat.v1.shape[0][0]']     
 ingOpLambda)                                                                                     
                                                                                              