In [2]:
import pandas as pd
import keras


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [58]:
df = pd.read_csv('/content/data_phdosado.csv', index_col=1, engine='python', on_bad_lines='skip') # Colab
# df = pd.read_csv('../data/data_phdosado.csv', index_col=1)
df = df.drop(columns=['ID', 'Real_RD_PV_BaumeCal'])

In [59]:
# Check for nulls
df.isnull().sum()

Unnamed: 0,0
Real_RD_PV_pHDosado,89
Real_RD_MV_ValvulaCalpHDosado,97
Real_RD_PV_VazaoDosado,64
Real_RD_ST_LimpezaPHmetro,99
Real_RD_PV_PressaoLinhaCal,88
Real_RD_MV_ValvulaCalpHDosado-2,97
Real_RD_MV_PressaoLinhaCal,51
Real_RD_PV_NivelTqCal,92
Real_RD_PV_NivelTqDosado,61


In [60]:
df = df.dropna()

In [61]:
list(df.columns)

['Real_RD_PV_pHDosado',
 'Real_RD_MV_ValvulaCalpHDosado',
 'Real_RD_PV_VazaoDosado',
 'Real_RD_ST_LimpezaPHmetro',
 'Real_RD_PV_PressaoLinhaCal',
 'Real_RD_MV_ValvulaCalpHDosado-2',
 'Real_RD_MV_PressaoLinhaCal',
 'Real_RD_PV_NivelTqCal',
 'Real_RD_PV_NivelTqDosado']

In [62]:
df.head()


Unnamed: 0_level_0,Real_RD_PV_pHDosado,Real_RD_MV_ValvulaCalpHDosado,Real_RD_PV_VazaoDosado,Real_RD_ST_LimpezaPHmetro,Real_RD_PV_PressaoLinhaCal,Real_RD_MV_ValvulaCalpHDosado-2,Real_RD_MV_PressaoLinhaCal,Real_RD_PV_NivelTqCal,Real_RD_PV_NivelTqDosado
timezone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-07-13 14:28:22,7.126,37.128124,899.625,0.0,0.798437,37.128124,74.546875,98.3125,73.84375
2024-07-13 14:28:25,7.1225,37.178123,899.625,0.0,0.798437,37.178123,74.578125,98.34375,73.875
2024-07-13 14:28:28,7.1225,37.1875,899.625,0.0,0.796875,37.1875,74.559375,98.40625,73.875
2024-07-13 14:28:31,7.126,37.331249,899.625,0.0,0.798437,37.331249,74.596875,98.5,73.84375
2024-07-13 14:28:34,7.1225,37.21875,900.375,0.0,0.798437,37.21875,74.5625,98.25,73.875


## Pré-processamento

In [63]:
minute = 20 # Observações por minuto
hour = minute * 60
day = hour * 24

In [64]:
# Seleciona apenas os primeiros dez dias
df = df[(day * 10)]

In [65]:
# Normalização
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(df)
normalized_df = pd.DataFrame(normalized_data, columns=df.columns)

In [72]:
# Porcentagem usada para treinar o modelo
split_fraction = 0.715
train_split = int(split_fraction * int(df.shape[0]))

step = 1

# Usaremos os dados da última hora para prever 5 minutos
past = hour
future = minute * 5

learning_rate = 0.001
batch_size = 256
epochs = 10

In [73]:
train_data = normalized_df.loc[0 : train_split - 1]
val_data = normalized_df.loc[train_split:]

### Dataset de treinamento

In [74]:
# Dataset de treinamento
start = past + future
end = start + train_split

x_train = train_data.values
# y_train = normalized_df['Real_RD_PV_pHDosado'][:train_split - 1].values
y_train = normalized_df['Real_RD_PV_pHDosado'][start:end]

sequence_length = int(past / step)

In [75]:
dataset_train = keras.preprocessing.timeseries_dataset_from_array(
    x_train,
    y_train,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)

In [76]:
print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")

x_train shape: (205920, 9)
y_train shape: (205920,)


In [77]:
for batch in dataset_train.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

Input shape: (256, 1200, 9)
Target shape: (256,)


### Dataset de validação

In [78]:
# Dataset de validação
x_end = len(val_data) - past - future
label_start = train_split + past + future

x_val = val_data.iloc[:x_end].values
y_val = normalized_df['Real_RD_PV_pHDosado'][label_start:].values

dataset_val = keras.preprocessing.timeseries_dataset_from_array(
    x_val,
    y_val,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)

In [79]:
print(f"x_val shape: {x_val.shape}")
print(f"y_val shape: {y_val.shape}")

x_val shape: (80780, 9)
y_val shape: (80780,)


# Treinamento

In [80]:
inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
lstm_out = keras.layers.LSTM(32)(inputs)
# lstm_out = keras.layers.Dense(32)(inputs)
outputs = keras.layers.Dense(1)(lstm_out)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
model.summary()


In [81]:
path_checkpoint = "model_checkpoint.weights.h5"
es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5)

modelckpt_callback = keras.callbacks.ModelCheckpoint(
    monitor="val_loss",
    filepath=path_checkpoint,
    verbose=1,
    save_weights_only=True,
    save_best_only=True,
)

history = model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val,
    callbacks=[es_callback, modelckpt_callback],
)

Epoch 1/10
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - loss: 0.0116
Epoch 1: val_loss improved from inf to 0.00421, saving model to model_checkpoint.weights.h5
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 97ms/step - loss: 0.0116 - val_loss: 0.0042
Epoch 2/10
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - loss: 0.0024
Epoch 2: val_loss improved from 0.00421 to 0.00418, saving model to model_checkpoint.weights.h5
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 94ms/step - loss: 0.0024 - val_loss: 0.0042
Epoch 3/10
[1m799/800[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 74ms/step - loss: 0.0022
Epoch 3: val_loss improved from 0.00418 to 0.00379, saving model to model_checkpoint.weights.h5
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 93ms/step - loss: 0.0022 - val_loss: 0.0038
Epoch 4/10
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

# Referências

- [Timeseries forecasting for weather prediction](https://keras.io/examples/timeseries/timeseries_weather_forecasting/)