In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

import joblib

import tensorflow as tf

from tensorflow.test import gpu_device_name

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanAbsoluteError, MeanSquaredError
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

from sklearn.preprocessing import MinMaxScaler

In [33]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [34]:
tf.random.set_seed(0)
np.random.seed(0)

In [35]:
root_path = '/content/drive/MyDrive/INF2475/2412841'

In [36]:
df = pd.read_excel(root_path+'/data/dados_carga_sin.xlsx')

In [37]:
df.head()

Unnamed: 0,data,carga
0,2016-01-01,51554
1,2016-01-02,54888
2,2016-01-03,53244
3,2016-01-04,61797
4,2016-01-05,63793


In [38]:
df.index = pd.to_datetime(df['data'])
df.pop('data')
df

Unnamed: 0_level_0,carga
data,Unnamed: 1_level_1
2016-01-01,51554
2016-01-02,54888
2016-01-03,53244
2016-01-04,61797
2016-01-05,63793
...,...
2024-04-26,81974
2024-04-27,77379
2024-04-28,71995
2024-04-29,83111


In [39]:
df['carga'] = df['carga'].astype(float)

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3043 entries, 2016-01-01 to 2024-04-30
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   carga   3043 non-null   float64
dtypes: float64(1)
memory usage: 47.5 KB


In [41]:
df.describe()

Unnamed: 0,carga
count,3043.0
mean,66422.22675
std,7549.023138
min,48586.0
25%,61372.5
50%,66337.0
75%,71149.0
max,91360.0


O conjunto de dados possui, ao todo, 3043 observações e não há valores faltantes. O menor valor de carga é 48586 e o maior valor é 91360.

In [10]:
px.line(df)

In [11]:
# Normalizando os dados e salvando objeto de scaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit(df.values)

In [13]:
joblib.dump(scaler, f'{root_path}/resources/scalers/scaler.gz')

['/content/drive/MyDrive/INF2475/2412841/resources/scalers/scaler.gz']

In [12]:
scaled_data = scaler.transform(df.values)
np.savetxt(f'{root_path}/data/scaled_data.csv', scaled_data, delimiter=',')

In [14]:
# Função para criar sequência de entrada e saída
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step):
        a = dataset[i:(i + time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [15]:
# Model 1: time_steps = 90, batch_sizes = 64
# Model 2: time_steps = 90, batch_sizes = 32

# Model 3: time_steps = 60, batch_sizes = 64
# Model 4: time_steps = 60, batch_sizes = 32

# Model 5: time_steps = 30, batch_sizes = 64
# Model 6: time_steps = 30, batch_sizes = 32

time_steps = [90, 60, 30]
batch_sizes = [64, 32]

parameters_dict = {
    'time_steps': time_steps,
    'batch_sizes': batch_sizes
}

In [None]:

def train_multiple_lstm(params):

    i = 1

    for time_step in params['time_steps']:
        for batch_size in params['batch_sizes']:

            X, y = create_dataset(scaled_data, time_step)
            X = X.reshape(X.shape[0], X.shape[1], 1)

            # Construindo o modelo LSTM
            model = Sequential()
            model.add(LSTM(50, return_sequences=False))
            model.add(Dense(25))
            model.add(Dense(1))

            # Compilando o modelo
            model.compile(
                optimizer=Adam(learning_rate=0.001),
                loss=MeanAbsoluteError(),
                metrics=[MeanSquaredError()]
            )

            checkpoint = ModelCheckpoint(
                f'{root_path}/resources/models/model{i}/model{i}.h5', save_best_only=True
            )

            train_size = int(len(X) * 0.8)
            X_train, X_test = X[0:train_size], X[train_size:len(X)]
            y_train, y_test = y[0:train_size], y[train_size:len(y)]

            device_name = gpu_device_name()

            with tf.device(device_name):
                model.fit(X_train, y_train,
                          batch_size=batch_size,
                          validation_split=0.2,
                          epochs=100,
                          callbacks=[checkpoint]
                )

            np.save(
                f'{root_path}/data/model{i}_X_train.npy',
                X_train
            )
            np.save(
                f'{root_path}/data/model{i}_X_test.npy',
                X_test
            )
            np.save(
                f'{root_path}/data/model{i}_y_train.npy',
                y_train
            )
            np.save(
                f'{root_path}/data/model{i}_y_test.npy',
                y_test
            )

            i += 1


In [None]:
train_multiple_lstm(parameters_dict)

Epoch 1/100
Epoch 2/100


You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.



Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 