In [None]:
import tensorflow as tf
import tensorflow_transform as tft
import tensorflow_addons as tfa

In [None]:
training_file_path = "##########"
validation_file_path = "##########"

In [None]:
from functools import partial

# Setting defaults
CSV_COLUMNS = [
    'day_sin', 'day_cos', 'year_sin', 'year_cos', 'air_pressure_ashore', 'air_pressure_afloat',
    'diff_air_pressure', 'precipitation', 'is_rainny', 'temperature', 'humidity', 'wind_vector_x', "wind_vector_y",
    'hours_of_daylight', 'global_solar_radiation', 'weather', 'cloud_cover', 'temp_mean', 'temp_var'
]

SELECT_COLUMNS = [
    'day_sin', 'day_cos', 'year_sin', 'year_cos', 'air_pressure_ashore', 'air_pressure_afloat',
    'diff_air_pressure', 'precipitation', 'is_rainny', 'temperature', 'humidity', 'wind_vector_x', "wind_vector_y",
    'hours_of_daylight', 'global_solar_radiation', 'weather', 'cloud_cover'
]    

DEFAULTS = [[0.0] for _ in range(len(SELECT_COLUMNS))]

# Loading dataset
def load_dataset(filename, batch_size, mode):
    
    # Packing features
    def pack(features):
        packed_features =  tf.stack(list(features.values()), axis=1)

        return tf.reshape(packed_features, [-1])
    
    @tf.function
    def marshal(x, feature_keys):
        features = {
            k: x[:, feature_keys.index(k)] for k in feature_keys
        }
        
        return features
    
    def cast_to_integer(x, int_features):
        features = {
            k: tf.cast(v, tf.int64) if k in int_features else v for k, v in x.items()
        }
        
        return features
    
    def zero_zip(input):
        return {"encoder_inputs": input, "decoder_inputs": {"start_inputs" : tf.zeros(shape=(1,))}}

    # Window processing
    def windowed_dataset(dataset, batch_size, mode):
        CATEGORICAL_FEATURES = ["is_rainny", "weather", "cloud_cover"]
        
        marshal_fn_partial = partial(marshal, feature_keys=SELECT_COLUMNS)
        cast_to_integer_fn_partial = partial(cast_to_integer, int_features=CATEGORICAL_FEATURES)
        
        dataset = dataset.map(pack)
        dataset = dataset.window(size=48, shift=1, drop_remainder=True)
        dataset = dataset.flat_map(lambda window: window.batch(48))

        if mode == "train":
            dataset.shuffle(1000)
        
        if mode == "train" or mode == "eval":    
            encoder_input = dataset.map(lambda window: window[:24]).map(marshal_fn_partial).map(cast_to_integer_fn_partial)
            decoder_input = dataset.map(lambda window: tf.concat((tf.zeros((1)), window[24:-1, 9]), axis=0))
            decoder_output = dataset.map(lambda window: window[24:, 9])

            inputs = tf.data.Dataset.zip((encoder_input, decoder_input))
            dataset = tf.data.Dataset.zip((inputs, decoder_output)).cache()
            
        else:
            x_test = dataset.map(lambda window: window[:24]).map(marshal_fn_partial)
            y_true = dataset.map(lambda window: window[24:, 9])
                        
            x_test = x_test.map(zero_zip)
            
            dataset = tf.data.Dataset.zip((x_test, y_true)).cache()
            
        dataset = dataset.batch(batch_size, drop_remainder=True).repeat(1).prefetch(1)  
        
        return dataset
    
    dataset = tf.data.experimental.make_csv_dataset(
            file_pattern=filename,
            column_names=CSV_COLUMNS,
            column_defaults=DEFAULTS,
            select_columns=SELECT_COLUMNS,
            batch_size=1,
            shuffle=False,
            num_epochs=1)

    dataset = windowed_dataset(dataset, batch_size, mode)

    return dataset

In [None]:
train_dataset = load_dataset(training_file_path, 256, "train")
valid_dataset = load_dataset(validation_file_path, 128, "eval")

In [None]:
CATEGORICAL_FEATURES = ["is_rainny", "weather", "cloud_cover"]

In [None]:
def to_sparse_tensor(dense):

    # sequence_numeric_column default is float32
    zero = tf.constant(-100.0, dtype=tf.dtypes.float32) 

    where = tf.not_equal(dense, zero)
    indices = tf.where(where)
    values = tf.gather_nd(dense, indices)

    return tf.SparseTensor(indices, values, tf.shape(dense, out_type=tf.dtypes.int64))

In [None]:
TRANSFORM_ARTEFACTS_DIR = "##########"
tf_transform_output = tft.TFTransformOutput(TRANSFORM_ARTEFACTS_DIR)

In [None]:
sequence_feature_columns = []

for feature_name in SELECT_COLUMNS:
    if feature_name in CATEGORICAL_FEATURES:
        NUM_BUCKETS = tf_transform_output.vocabulary_size_by_name(feature_name)
        categorical_features = tf.feature_column.sequence_categorical_column_with_identity(feature_name, num_buckets=NUM_BUCKETS)
        categorical_features_one_hot = tf.feature_column.indicator_column(categorical_features)
        sequence_feature_columns.append(categorical_features_one_hot)
        
    else:
        numerical_features = tf.feature_column.sequence_numeric_column(feature_name, normalizer_fn=to_sparse_tensor)
        sequence_feature_columns.append(numerical_features)

In [None]:
SELECT_COLUMNS = [
    'day_sin', 'day_cos', 'year_sin', 'year_cos', 'air_pressure_ashore', 'air_pressure_afloat',
    'diff_air_pressure', 'precipitation', 'is_rainny', 'temperature', 'humidity', 'wind_vector_x', "wind_vector_y",
    'hours_of_daylight', 'global_solar_radiation', 'weather', 'cloud_cover'
]

encoder_input_layers = {
    colname: tf.keras.layers.Input(name=colname, shape=(24, 1), dtype=tf.int64)
    if colname in CATEGORICAL_FEATURES else tf.keras.layers.Input(name=colname, shape=(24, 1), dtype=tf.float32) for colname in SELECT_COLUMNS
}

sequence_input, sequence_length = tf.keras.experimental.SequenceFeatures(sequence_feature_columns)(encoder_input_layers)

# Encoder
encoder_lstm = tf.keras.layers.LSTM(256, return_sequences=True, name="encoder_lstm1")(sequence_input)
encoder_dropout = tf.keras.layers.Dropout(0.2, name="encoder_dropout")(encoder_lstm)
encoder_output, state_h, state_c = tf.keras.layers.LSTM(256, return_state=True, name="encoder_lstm2")(encoder_dropout)
encoder_state = [state_h, state_c]

# Sampler
sampler = tfa.seq2seq.sampler.ScheduledOutputTrainingSampler(
    sampling_probability=0.,
    next_inputs_fn=lambda outputs: tf.reshape(outputs, shape=(1, 1))
)
sampler.sampling_probability = tf.Variable(0.)

# Decoder
decoder_input = tf.keras.layers.Input(shape=(24, 1), name="decoder_input")

decoder_cell = tf.keras.layers.LSTMCell(256, name="decoder_lstm")
output_layer = tf.keras.layers.Dense(1, name="decoder_output")

decoder = tfa.seq2seq.basic_decoder.BasicDecoder(decoder_cell, sampler, output_layer=output_layer)

decoder_output, _, _ = decoder(decoder_input, initial_state=encoder_state, sequence_length=[24])

final_output = decoder_output.rnn_output

# Creating model
model = tf.keras.Model(inputs=[encoder_input_layers, decoder_input], outputs=[final_output])

optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss="mse", optimizer=optimizer)

In [None]:
model.summary()

In [None]:
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)

In [None]:
# Inference sampler
inference_sampler = tfa.seq2seq.sampler.InferenceSampler(
    sample_fn = lambda outputs: outputs,
    sample_shape = [1],
    sample_dtype = tf.float32,
    end_fn = lambda sample_ids : False
)

# Inference decoder
inference_decoder = tfa.seq2seq.basic_decoder.BasicDecoder(
    decoder_cell, inference_sampler, output_layer=output_layer, maximum_iterations=24
)

start_inputs = tf.keras.layers.Input(shape=(1), name="start_inputs", dtype=tf.float32)

decoder_output, _, _ = inference_decoder(start_inputs, initial_state=encoder_state)

final_output = decoder_output.rnn_output

# Creating inference model
inference_model = tf.keras.Model(
    inputs={"encoder_inputs": encoder_input_layers, "decoder_inputs": {"start_inputs": start_inputs}},
    outputs=[final_output])

In [None]:
tf.keras.models.save_model(inference_model, "model")

In [None]:
SELECT_COLUMNS = [
    'day_sin', 'day_cos', 'year_sin', 'year_cos', 'air_pressure_ashore', 'air_pressure_afloat',
    'diff_air_pressure', 'precipitation', 'is_rainny', 'temperature', 'humidity', 'wind_vector_x', "wind_vector_y",
    'hours_of_daylight', 'global_solar_radiation', 'weather', 'cloud_cover'
]

encoder_input_layers = {
    colname: tf.keras.layers.Input(name=colname, shape=(24, 1), dtype=tf.int64)
    if colname in CATEGORICAL_FEATURES else tf.keras.layers.Input(name=colname, shape=(24, 1), dtype=tf.float32) for colname in SELECT_COLUMNS
}

sequence_input, sequence_length = tf.keras.experimental.SequenceFeatures(sequence_feature_columns)(encoder_input_layers)

# Encoder
encoder_lstm = tf.keras.layers.LSTM(256, return_sequences=True, name="encoder_lstm1")(sequence_input)
encoder_dropout = tf.keras.layers.Dropout(0.2, name="encoder_dropout")(encoder_lstm)
encoder_output, state_h, state_c = tf.keras.layers.LSTM(256, return_state=True, return_sequences=True, name="encoder_lstm2")(encoder_dropout)
encoder_state = [state_h, state_c]

# Sampler
sampler = tfa.seq2seq.sampler.ScheduledOutputTrainingSampler(
    sampling_probability=0.,
    next_inputs_fn=lambda outputs: tf.reshape(outputs, shape=(1, 1))
)
sampler.sampling_probability = tf.Variable(0.)

# Decoder
decoder_input = tf.keras.layers.Input(shape=[24, 1], name="decoder_input")

attention_mechanism = tfa.seq2seq.LuongAttention(256, encoder_output)

decoder_cell = tf.keras.layers.LSTMCell(256, name="decoder_lstm")
decoder_cell = tfa.seq2seq.AttentionWrapper(
    decoder_cell,
    attention_mechanism
)

output_layer = tf.keras.layers.Dense(1, name="decoder_output")

decoder = tfa.seq2seq.basic_decoder.BasicDecoder(decoder_cell, sampler, output_layer=output_layer)

decoder_initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=tf.shape(decoder_input)[0])
decoder_initial_state = decoder_initial_state.clone(cell_state=encoder_state)

decoder_output, _, _ = decoder(decoder_input, initial_state=decoder_initial_state, sequence_length=[24])

final_output = decoder_output.rnn_output

# Creating model
model = tf.keras.Model(inputs=[encoder_input_layers, decoder_input], outputs=[final_output])

optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss="mse", optimizer=optimizer)