In [2]:
import pandas as pd
import numpy as np

# Carregar os dados de vazão
flow_data = pd.read_csv('flow_data.csv', parse_dates=['date'])

# Carregar os dados de precipitação
rain_data = pd.read_csv('rain_data.csv', parse_dates=['date'])

# Exibir as primeiras linhas dos dados para confirmar que foram carregados corretamente
print(flow_data.head())
print(rain_data.head())

        date       vazao
0 2020-01-01  109.934283
1 2020-01-02   97.234714
2 2020-01-03  112.953771
3 2020-01-04  130.460597
4 2020-01-05   95.316933
        date  precipitacao
0 2020-01-01      5.715575
1 2020-01-02      6.121569
2 2020-01-03      7.166102
3 2020-01-04      7.107604
4 2020-01-05      2.244661


In [3]:
# Mesclar os dados de vazão e precipitação com base na coluna 'date'
dados = pd.merge(flow_data, rain_data, on='date')

# Ordenar os dados pela data
dados.sort_values('date', inplace=True)

# Mostrar as primeiras linhas do DataFrame combinado
print(dados.head())

        date       vazao  precipitacao
0 2020-01-01  109.934283      5.715575
1 2020-01-02   97.234714      6.121569
2 2020-01-03  112.953771      7.166102
3 2020-01-04  130.460597      7.107604
4 2020-01-05   95.316933      2.244661


In [4]:
# Normalizar os dados (opcional, mas recomendado para redes neurais)
from sklearn.preprocessing import MinMaxScaler

# Dividir os dados em treinamento e teste
split_date = dados['date'].iloc[int(0.8 * len(dados))]
train_data = dados[dados['date'] <= split_date]
test_data = dados[dados['date'] > split_date]

# Normalizar os dados de treinamento
scaler = MinMaxScaler()
train_data[['vazao', 'precipitacao']] = scaler.fit_transform(train_data[['vazao', 'precipitacao']])

# Normalizar os dados de teste usando os parâmetros do treinamento
test_data[['vazao', 'precipitacao']] = scaler.transform(test_data[['vazao', 'precipitacao']])

# Função para criar as sequências de dados
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data.iloc[i:(i + seq_length)].drop('vazao', axis=1).values
        y = data.iloc[i + seq_length]['vazao']
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

SEQ_LENGTH = 7  # Usar 1 semana (7 dias) de dados históricos
X_train, y_train = create_sequences(train_data, SEQ_LENGTH)
X_test, y_test = create_sequences(test_data, SEQ_LENGTH)

print(f"Formato dos dados de treinamento: X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"Formato dos dados de teste: X_test: {X_test.shape}, y_test: {y_test.shape}")

Formato dos dados de treinamento: X_train: (154, 7, 2), y_train: (154,)
Formato dos dados de teste: X_test: (32, 7, 2), y_test: (32,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data[['vazao', 'precipitacao']] = scaler.fit_transform(train_data[['vazao', 'precipitacao']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data[['vazao', 'precipitacao']] = scaler.transform(test_data[['vazao', 'precipitacao']])


In [8]:
from keras.models import Sequential
from keras.losses import Huber
from keras.layers import LSTM, Bidirectional, Dense, Dropout

# Definir o modelo BiLSTM
model = Sequential()
model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=(SEQ_LENGTH, X_train.shape[2])))
model.add(Dropout(0.1))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.1))
model.add(Dense(1))

model.compile(optimizer='adam', loss=Huber())

# Resumo do modelo
model.summary()

  super().__init__(**kwargs)
