<a href="https://colab.research.google.com/github/vhrique/ELF/blob/main/Water_Transformer_Backup_BRACIS_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
import keras_hub.layers as khub

from sklearn import metrics

from tqdm.notebook import tqdm

import matplotlib.pyplot as plt

In [None]:
training_data = pd.read_csv(
    'https://www.dropbox.com/s/z32q8nks8iqkiuv/waterDataTraining.csv?dl=1',
    index_col=0)
testing_data = pd.read_csv(
    'https://www.dropbox.com/s/3ptrkyisyks2us3/waterDataTesting.csv?dl=1',
    index_col=0)

In [None]:
Xtrain = training_data.iloc[:,1:-1]
Ytrain = np.array(training_data['EVENT'])

Xtest = testing_data.iloc[:,1:-1]
Ytest = np.array(testing_data['EVENT'])

In [None]:
Xtrain = Xtrain.fillna(method='ffill')
Xtest = Xtest.fillna(method='ffill')

  Xtrain = Xtrain.fillna(method='ffill')
  Xtest = Xtest.fillna(method='ffill')


In [None]:
window_trend = 24 * 60

Xtrain_trend = Xtrain.rolling(window_trend, min_periods=1).mean()
Xtest_trend = Xtest.rolling(window_trend, min_periods=1).mean()

Xtrain = Xtrain - Xtrain_trend
Xtest = Xtest - Xtest_trend

In [None]:
val = round(Xtrain.shape[0]/2)

Xval = Xtrain[val:]
Yval = Ytrain[val:]

Xtrain = Xtrain[:val]
Ytrain = Ytrain[:val]

In [None]:
means = Xtrain.mean()
stds = Xtrain.std()

Xtrain = (Xtrain - means)/stds
Xval = (Xval - means)/stds
Xtest = (Xtest - means)/stds

In [None]:
window = 30

Xtrain_ext = pd.concat([pd.DataFrame([Xtrain.iloc[0]]*(window-1)), Xtrain])
Xval_ext = pd.concat([pd.DataFrame([Xval.iloc[0]]*(window-1)), Xval])
Xtest_ext = pd.concat([pd.DataFrame([Xtest.iloc[0]]*(window-1)), Xtest])

Xtrain_3d = np.zeros((Xtrain.shape[0], window, Xtrain.shape[1]))
Xval_3d = np.zeros((Xval.shape[0], window, Xval.shape[1]))
Xtest_3d = np.zeros((Xtest.shape[0], window, Xtest.shape[1]))

for i in tqdm(range(Xtrain.shape[0])):
    data = Xtrain_ext.iloc[i:i+window,:]
    # Xtrain_3d[i] = (data - data.mean()) / data.std()
    Xtrain_3d[i] = (data - data.mean())
    # Xtrain_3d[i] = Xtrain_ext.iloc[i:i+window,:]
for i in tqdm(range(Xval.shape[0])):
    data = Xval_ext.iloc[i:i+window,:]
    # Xval_3d[i] = (data - data.mean()) / data.std()
    Xval_3d[i] = (data - data.mean())
    # Xval_3d[i] = Xval_ext.iloc[i:i+window,:]
for i in tqdm(range(Xtest.shape[0])):
    data = Xtest_ext.iloc[i:i+window,:]
    # Xtest_3d[i] = (data - data.mean()) / data.std()
    Xtest_3d[i] = (data - data.mean())
    # Xtest_3d[i] = Xtest_ext.iloc[i:i+window,:]

  0%|          | 0/69783 [00:00<?, ?it/s]

  0%|          | 0/69783 [00:00<?, ?it/s]

  0%|          | 0/139566 [00:00<?, ?it/s]

In [None]:
n_features = Xtrain_3d.shape[2]

Xtrain_3d_split = [Xtrain_3d[:, :, i].reshape(Xtrain_3d.shape[0], Xtrain_3d.shape[1], 1) for i in range(n_features)]
Xval_3d_split = [Xval_3d[:, :, i].reshape(Xval_3d.shape[0], Xval_3d.shape[1], 1) for i in range(n_features)]
Xtest_3d_split = [Xtest_3d[:, :, i].reshape(Xtest_3d.shape[0], Xtest_3d.shape[1], 1) for i in range(n_features)]

In [None]:
epochs = 100
batch = 32

In [40]:
n_samples, n_window, n_features = Xtrain_3d.shape
multiplier = 10

model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(n_window, n_features)))
model.add(tf.keras.layers.Dense(n_features*multiplier,activation='linear'))
model.add(khub.PositionEmbedding(sequence_length=n_window))
model.add(khub.TransformerEncoder(intermediate_dim=n_features*multiplier, num_heads=4, dropout=0.0, activation='relu'))
# model.add(tf.keras.layers.Dropout(0.1))
model.add(khub.TransformerEncoder(intermediate_dim=n_features*multiplier, num_heads=4, dropout=0.0, activation='relu'))
# model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.GlobalAveragePooling1D())

model.add(tf.keras.layers.Dense(n_features*multiplier,activation='relu'))
# model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(n_features*multiplier,activation='relu'))
# model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(n_features*multiplier,activation='relu'))
# model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))

# opt = tf.keras.optimizers.Adam(learning_rate=0.00001)
model.compile(loss="binary_crossentropy", optimizer='adam')

indices_train_true = np.nonzero(Ytrain)[0]
indices_train_false = np.nonzero(~Ytrain)[0]
indices_val_true = np.nonzero(Yval)[0]
indices_val_false = np.nonzero(~Yval)[0]

for _ in tqdm(range(epochs)):

  indices_train = np.concatenate((
      indices_train_true,
      np.random.choice(indices_train_false,
                       len(indices_train_true),
                       replace=False)))

  indices_val = np.concatenate((
      indices_val_true,
      np.random.choice(indices_val_false,
                       len(indices_val_true),
                       replace=False)))

  model.fit(Xtrain_3d[indices_train], Ytrain[indices_train],
            validation_data=(Xval_3d[indices_val], Yval[indices_val]),
            batch_size=batch, verbose=0)

y_pred = model.predict(Xtest_3d)
'F1:', np.max([metrics.f1_score(Ytest, y_pred>(th/100)) for th in range(101)])

  0%|          | 0/100 [00:00<?, ?it/s]



[1m4362/4362[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step


('F1:', np.float64(0.0328270904542091))

In [39]:
model.summary()

In [None]:
n_samples, n_window, n_features = Xtrain_3d.shape
n_filters = 20

visible = list()
cnn = list()
for i in range(n_features):
  visible.append(tf.keras.layers.Input(shape=(n_window,1)))
  cnn.append(tf.keras.layers.Conv1D(filters=n_filters, kernel_size=kernel_size, activation='relu')(visible[i]))
  # cnn[i] = tf.keras.layers.Dropout(0.1)(cnn[i])
  cnn[i] = tf.keras.layers.Conv1D(filters=n_filters, kernel_size=kernel_size, activation='relu')(cnn[i])
  # cnn[i] = tf.keras.layers.Dropout(0.1)(cnn[i])
  cnn[i] = tf.keras.layers.GlobalMaxPooling1D()(cnn[i])

dense = tf.keras.layers.concatenate(cnn)
dense = tf.keras.layers.Dense(n_features*n_filters,activation='relu')(dense)
# dense = tf.keras.layers.Dropout(0.1)(dense)
dense = tf.keras.layers.Dense(n_features*n_filters,activation='relu')(dense)
# dense = tf.keras.layers.Dropout(0.1)(dense)
dense = tf.keras.layers.Dense(n_features*n_filters,activation='relu')(dense)
# dense = tf.keras.layers.Dropout(0.1)(dense)
output = tf.keras.layers.Dense(1,activation='sigmoid')(dense)

model = tf.keras.Model(inputs=visible, outputs=output)

model.compile(loss="binary_crossentropy", optimizer='adam')

indices_train_true = np.nonzero(Ytrain)[0]
indices_train_false = np.nonzero(~Ytrain)[0]
indices_val_true = np.nonzero(Yval)[0]
indices_val_false = np.nonzero(~Yval)[0]
for _ in tqdm(range(epochs)):

  indices_train = np.concatenate((
      indices_train_true,
      np.random.choice(indices_train_false,
                       len(indices_train_true),replace=False)))

  indices_val = np.concatenate((
      indices_val_true,
      np.random.choice(indices_val_false,
                       len(indices_val_true),replace=False)))

  Xtrain_under = []
  Xval_under = []
  for f in range(n_features):
    Xtrain_under.append(Xtrain_3d_split[f][indices_train])
    Xval_under.append(Xval_3d_split[f][indices_val])

  model.fit(Xtrain_under, Ytrain[indices_train], batch_size=batch,
            validation_data = (Xval_under, Yval[indices_val]),
            verbose=0)

y_pred = model.predict(Xtest_3d_split)
'F1:', np.max([metrics.f1_score(Ytest, y_pred>(th/100)) for th in range(101)])

  0%|          | 0/100 [00:00<?, ?it/s]



('F1:', 0.6333885666942835)

In [None]:
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_37 (InputLayer)       [(None, 30, 1)]              0         []                            
                                                                                                  
 input_38 (InputLayer)       [(None, 30, 1)]              0         []                            
                                                                                                  
 input_39 (InputLayer)       [(None, 30, 1)]              0         []                            
                                                                                                  
 input_40 (InputLayer)       [(None, 30, 1)]              0         []                            
                                                                                            

In [None]:
n_samples, n_window, n_features = Xtrain_3d.shape

visible = list()
shared_conv1 = tf.keras.layers.Conv1D(filters=n_filters, kernel_size=kernel_size, activation='relu')
shared_conv2 = tf.keras.layers.Conv1D(filters=n_filters, kernel_size=kernel_size, activation='relu')
cnn = list()
for i in range(n_features):
  visible.append(tf.keras.layers.Input(shape=(n_window,1)))
  cnn.append(shared_conv1(visible[i]))
  # cnn[i] = tf.keras.layers.Dropout(0.1)(cnn[i])
  cnn[i] = shared_conv2(cnn[i])
  # cnn[i] = tf.keras.layers.Dropout(0.1)(cnn[i])
  cnn[i] = tf.keras.layers.GlobalMaxPooling1D()(cnn[i])

dense = tf.keras.layers.concatenate(cnn)
dense = tf.keras.layers.Dense(n_features*n_filters,activation='relu')(dense)
# dense = tf.keras.layers.Dropout(0.1)(dense)
dense = tf.keras.layers.Dense(n_features*n_filters,activation='relu')(dense)
# dense = tf.keras.layers.Dropout(0.1)(dense)
dense = tf.keras.layers.Dense(n_features*n_filters,activation='relu')(dense)
# dense = tf.keras.layers.Dropout(0.1)(dense)
output = tf.keras.layers.Dense(1,activation='sigmoid')(dense)

model = tf.keras.Model(inputs=visible, outputs=output)

model.compile(loss="binary_crossentropy", optimizer='adam')

indices_train_true = np.nonzero(Ytrain)[0]
indices_train_false = np.nonzero(~Ytrain)[0]
indices_val_true = np.nonzero(Yval)[0]
indices_val_false = np.nonzero(~Yval)[0]
for _ in tqdm(range(epochs)):

  indices_train = np.concatenate((
      indices_train_true,
      np.random.choice(indices_train_false,
                       len(indices_train_true),replace=False)))

  indices_val = np.concatenate((
      indices_val_true,
      np.random.choice(indices_val_false,
                       len(indices_val_true),replace=False)))

  Xtrain_under = []
  Xval_under = []
  for f in range(n_features):
    Xtrain_under.append(Xtrain_3d_split[f][indices_train])
    Xval_under.append(Xval_3d_split[f][indices_val])

  model.fit(Xtrain_under, Ytrain[indices_train], batch_size=batch,
            validation_data = (Xval_under, Yval[indices_val]),
            verbose=0)

y_pred = model.predict(Xtest_3d_split)
'F1:', np.max([metrics.f1_score(Ytest, y_pred>(th/100)) for th in range(101)])

  0%|          | 0/100 [00:00<?, ?it/s]



('F1:', 0.5261578044596913)

In [None]:
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_28 (InputLayer)       [(None, 30, 1)]              0         []                            
                                                                                                  
 input_29 (InputLayer)       [(None, 30, 1)]              0         []                            
                                                                                                  
 input_30 (InputLayer)       [(None, 30, 1)]              0         []                            
                                                                                                  
 input_31 (InputLayer)       [(None, 30, 1)]              0         []                            
                                                                                            

In [None]:
# np.savetxt('y_cnn.csv',y_pred >= 0.5, fmt='%d')