In [4]:
%pip install mlflow joblib


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from itertools import product
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import mlflow
import mlflow.pytorch
from joblib import Parallel, delayed

# Configurar MLflow
mlflow.set_tracking_uri("http://localhost:5001")
mlflow.set_experiment("Prometheus_Transformer_Experiment")

# Definindo constantes
DATA_DIR = "../../data/"
FILE_PATH = os.path.join(DATA_DIR, 'ts.pkl')
SEQ_LENGTH = 48  # Reduzido para 12 horas (48 * 15min)
MB = 1_048_576

# 1. Carregar e reamostrar os dados
df = pd.read_pickle(FILE_PATH)
ts = df['value'].astype(float).resample('15min').mean().dropna()
dates = ts.index

# 2. Dividir os dados: 60% treino, 20% validação, 20% teste
train_size = int(0.6 * len(ts))
val_size = int(0.2 * len(ts))
train = ts[:train_size]
val = ts[train_size:train_size + val_size]
test = ts[train_size + val_size:]

# 3. Escalonar os dados
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
val_scaled = scaler.transform(val.values.reshape(-1, 1))
test_scaled = scaler.transform(test.values.reshape(-1, 1))

# 4. Criar sequências
def create_sequences(data, dates, seq_length):
    X, y, y_dates = [], [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
        y_dates.append(dates[i + seq_length])
    return np.array(X), np.array(y), np.array(y_dates)

X_train, y_train, y_dates_train = create_sequences(train_scaled, dates[:train_size], SEQ_LENGTH)
X_val, y_val, y_dates_val = create_sequences(val_scaled, dates[train_size:train_size + val_size], SEQ_LENGTH)
X_test, y_test, y_dates_test = create_sequences(test_scaled, dates[train_size + val_size:], SEQ_LENGTH)

# 5. Ajustar dimensões para o modelo Transformer
d_model = 128
X_train = np.repeat(X_train, d_model, axis=2)
X_val = np.repeat(X_val, d_model, axis=2)
X_test = np.repeat(X_test, d_model, axis=2)

# 6. Converter para tensores PyTorch
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# 7. Definir codificação posicional
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return x

# 8. Definir o modelo Transformer com dropout adicional
class Encoder(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_layers, dim_feedforward, dropout=0.1):
        super(Encoder, self).__init__()
        self.pos_encoder = PositionalEncoding(d_model)
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.dropout = nn.Dropout(dropout)
        self.linear_out = nn.Linear(d_model, 1, bias=True)

    def forward(self, src):
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.dropout(output[:, -1, :])
        output = self.linear_out(output)
        return output

# Hiperparâmetros para tuning (reduzido com base nos resultados parciais)
learning_rates = [0.0005, 0.001, 0.002]  # Foco em valores próximos a 0.001
num_layers_list = [2, 3]  # Reduzido para evitar alta variabilidade
nheads = [4, 8]
dim_feedforwards = [256, 512]
dropout_rates = [0.1, 0.2, 0.3]
weight_decays = [0.0, 0.01, 0.1]

# Outros hiperparâmetros fixos
input_dim = d_model
batch_size = 32
num_epochs = 50

# Função para calcular SMAPE
def smape(y_true, y_pred):
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    diff = np.abs(y_true - y_pred) / denominator
    return 100 * np.mean(diff)

# Função de treinamento e avaliação para uma repetição
def train_and_evaluate(learning_rate, num_layers, nhead, dim_feedforward, dropout_rate, weight_decay):
    model = Encoder(input_dim, d_model, nhead, num_layers, dim_feedforward, dropout=dropout_rate)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        for i in range(0, len(X_train), batch_size):
            X_batch = X_train[i:i + batch_size]
            y_batch = y_train[i:i + batch_size]
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            y_val_pred = model(X_val)
            val_loss = criterion(y_val_pred, y_val)
        
        scheduler.step(val_loss)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break

    model.eval()
    with torch.no_grad():
        y_pred = model(X_test)
    
    y_pred_rescaled = scaler.inverse_transform(y_pred.numpy()) / MB
    y_test_rescaled = scaler.inverse_transform(y_test.numpy()) / MB
    
    mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
    rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))
    mape = mean_absolute_percentage_error(y_test_rescaled, y_pred_rescaled) * 100
    smape_val = smape(y_test_rescaled, y_pred_rescaled)
    
    return mae, rmse, mape, smape_val, model

# Grid search com repetições paralelizadas
best_avg_mae = float('inf')
best_avg_rmse = float('inf')
best_hyperparams = None
best_model = None
n_repetitions = 5

for lr, nl, nh, df, dr, wd in product(learning_rates, num_layers_list, nheads, dim_feedforwards, dropout_rates, weight_decays):
    with mlflow.start_run():
        # Registrar hiperparâmetros
        mlflow.log_param("learning_rate", lr)
        mlflow.log_param("num_layers", nl)
        mlflow.log_param("nhead", nh)
        mlflow.log_param("dim_feedforward", df)
        mlflow.log_param("dropout_rate", dr)
        mlflow.log_param("weight_decay", wd)
        mlflow.log_param("seq_length", SEQ_LENGTH)
        mlflow.log_param("resample_interval", "15min")
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("num_epochs", num_epochs)

        print(f"\nTesting combination: LR={lr}, Layers={nl}, Heads={nh}, FF={df}, Dropout={dr}, Weight Decay={wd}")
        
        # Executar repetições em paralelo
        results = Parallel(n_jobs=-1)(
            delayed(train_and_evaluate)(lr, nl, nh, df, dr, wd) for _ in range(n_repetitions)
        )
        
        mae_list = [result[0] for result in results]
        rmse_list = [result[1] for result in results]
        mape_list = [result[2] for result in results]
        smape_list = [result[3] for result in results]
        models = [result[4] for result in results]
        
        # Exibir resultados de cada repetição
        for rep, (mae, rmse, mape, smape_val, _) in enumerate(results):
            print(f"  Repetition {rep+1}/{n_repetitions}")
            print(f"    MAE: {mae}, RMSE: {rmse}, MAPE: {mape}%, SMAPE: {smape_val}%")
        
        # Calcular médias e desvios padrão
        avg_mae = np.mean(mae_list)
        avg_rmse = np.mean(rmse_list)
        avg_mape = np.mean(mape_list)
        avg_smape = np.mean(smape_list)
        std_mae = np.std(mae_list)
        std_rmse = np.std(rmse_list)
        std_mape = np.std(mape_list)
        std_smape = np.std(smape_list)
        
        print(f"  Average MAE: {avg_mae} (±{std_mae}), Average RMSE: {avg_rmse} (±{std_rmse})")
        print(f"  Average MAPE: {avg_mape}% (±{std_mape}), Average SMAPE: {avg_smape}% (±{std_smape})")
        
        # Registrar métricas médias no MLflow
        mlflow.log_metric("avg_mae", avg_mae)
        mlflow.log_metric("std_mae", std_mae)
        mlflow.log_metric("avg_rmse", avg_rmse)
        mlflow.log_metric("std_rmse", std_rmse)
        mlflow.log_metric("avg_mape", avg_mape)
        mlflow.log_metric("std_mape", std_mape)
        mlflow.log_metric("avg_smape", avg_smape)
        mlflow.log_metric("std_smape", std_smape)
        
        if avg_mae < best_avg_mae:
            best_avg_mae = avg_mae
            best_avg_rmse = avg_rmse
            best_hyperparams = {
                'learning_rate': lr,
                'num_layers': nl,
                'nhead': nh,
                'dim_feedforward': df,
                'dropout_rate': dr,
                'weight_decay': wd
            }
            best_model = models[0]
            # Registrar o melhor modelo no MLflow
            mlflow.pytorch.log_model(best_model, "best_model")

print(f'\nBest Hyperparameters: {best_hyperparams}')
print(f'Best Average MAE: {best_avg_mae}')
print(f'Best Average RMSE: {best_avg_rmse}')

# 9. Fazer previsões
model = best_model
model.eval()
with torch.no_grad():
    y_train_pred = model(X_train)
    y_test_pred = model(X_test)

# 10. Reverter o escalonamento e converter para MB
y_train_pred_mb = scaler.inverse_transform(y_train_pred.numpy()) / MB
y_train_mb = scaler.inverse_transform(y_train.numpy()) / MB
y_test_pred_mb = scaler.inverse_transform(y_test_pred.numpy()) / MB
y_test_mb = scaler.inverse_transform(y_test.numpy()) / MB

# 11. Preparar dados para plotagem
train_df = pd.DataFrame({
    'date': y_dates_train,
    'actual': y_train_mb.flatten(),
    'predicted': y_train_pred_mb.flatten()
}).sort_values('date')

test_df = pd.DataFrame({
    'date': y_dates_test,
    'actual': y_test_mb.flatten(),
    'predicted': y_test_pred_mb.flatten()
}).sort_values('date')

# 12. Plotar os resultados
plt.style.use('default')
fig, axs = plt.subplots(2, 1, figsize=(15, 10), sharex=False)

axs[0].plot(train_df['date'], train_df['actual'], label='Real', color='blue', linewidth=1.5)
axs[0].plot(train_df['date'], train_df['predicted'], label='Predito', color='red', alpha=0.7, linewidth=1.5)
axs[0].set_title('Conjunto de Treinamento (60%)', fontsize=12, pad=10)
axs[0].set_ylabel('Consumo de Memória (MB)', fontsize=10)
axs[0].legend(loc='upper left', fontsize=10)
axs[0].grid(True, linestyle='--', alpha=0.7)

axs[1].plot(test_df['date'], test_df['actual'], label='Real', color='blue', linewidth=1.5)
axs[1].plot(test_df['date'], test_df['predicted'], label='Predito', color='red', alpha=0.7, linewidth=1.5)
axs[1].set_title('Conjunto de Teste (20%)', fontsize=12, pad=10)
axs[1].set_xlabel('Data', fontsize=10)
axs[1].set_ylabel('Consumo de Memória (MB)', fontsize=10)
axs[1].legend(loc='upper left', fontsize=10)
axs[1].grid(True, linestyle='--', alpha=0.7)

for ax in axs:
    ax.xaxis.set_major_locator(mdates.AutoDateLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    ax.tick_params(axis='x', rotation=45, labelsize=9)
    ax.tick_params(axis='y', labelsize=9)

plt.suptitle('Predições do Transformer Otimizado - Prometheus (MB, Resample 15min)', fontsize=14, y=0.98)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig(os.path.join(DATA_DIR, 'prometheus_transformer_robust_15min.png'), dpi=300, bbox_inches='tight')
plt.close()


Testing combination: LR=0.0005, Layers=2, Heads=4, FF=256, Dropout=0.1, Weight Decay=0.0
  Repetition 1/5
    MAE: 7.453037261962891, RMSE: 13.5815116425269, MAPE: 0.9330697357654572%, SMAPE: 0.9461120963096619%
  Repetition 2/5
    MAE: 6.801558017730713, RMSE: 12.121268101187242, MAPE: 0.831527728587389%, SMAPE: 0.8393256664276123%
  Repetition 3/5
    MAE: 5.8792572021484375, RMSE: 11.9543191550151, MAPE: 0.7245367858558893%, SMAPE: 0.7335588932037354%
  Repetition 4/5
    MAE: 7.707339286804199, RMSE: 13.826567377845507, MAPE: 0.9615064598619938%, SMAPE: 0.9654054641723633%
  Repetition 5/5
    MAE: 10.9610013961792, RMSE: 15.573000387351602, MAPE: 1.368923857808113%, SMAPE: 1.3724135160446167%
  Average MAE: 7.760438632965088 (±1.7204192950935608), Average RMSE: 13.41133333278527 (±1.3161567566276022)
  Average MAPE: 0.9639129135757685% (±0.2190058105674193), Average SMAPE: 0.9713631868362427% (±0.2170249968767166)




🏃 View run kindly-crow-798 at: http://localhost:5001/#/experiments/1/runs/2ec2b96f972946f4a380338dc2ecfd80
🧪 View experiment at: http://localhost:5001/#/experiments/1

Testing combination: LR=0.0005, Layers=2, Heads=4, FF=256, Dropout=0.1, Weight Decay=0.01
  Repetition 1/5
    MAE: 12.079607009887695, RMSE: 17.896353818039827, MAPE: 1.4765908010303974%, SMAPE: 1.4823979139328003%
  Repetition 2/5
    MAE: 10.489054679870605, RMSE: 16.437737640867734, MAPE: 1.2837916612625122%, SMAPE: 1.2941336631774902%
  Repetition 3/5
    MAE: 9.777318000793457, RMSE: 15.983907235946527, MAPE: 1.204175129532814%, SMAPE: 1.212205171585083%
  Repetition 4/5
    MAE: 9.839430809020996, RMSE: 17.316701881781647, MAPE: 1.2343729846179485%, SMAPE: 1.2432461977005005%
  Repetition 5/5
    MAE: 10.204169273376465, RMSE: 18.531165188728774, MAPE: 1.2700697407126427%, SMAPE: 1.2797783613204956%
  Average MAE: 10.477915954589843 (±0.8413056908488283), Average RMSE: 17.233173153072904 (±0.930048595859641)
  Ave



🏃 View run colorful-deer-476 at: http://localhost:5001/#/experiments/1/runs/d01fc6a9b10648ceb89a224eb92d29bb
🧪 View experiment at: http://localhost:5001/#/experiments/1

Testing combination: LR=0.0005, Layers=2, Heads=4, FF=512, Dropout=0.1, Weight Decay=0.01
  Repetition 1/5
    MAE: 7.622212886810303, RMSE: 14.797512278383358, MAPE: 0.9454013779759407%, SMAPE: 0.9577459692955017%
  Repetition 2/5
    MAE: 9.83469009399414, RMSE: 17.492112480299156, MAPE: 1.2118125334382057%, SMAPE: 1.2330007553100586%
  Repetition 3/5
    MAE: 9.07991886138916, RMSE: 16.031510795271487, MAPE: 1.1303098872303963%, SMAPE: 1.1462740898132324%
  Repetition 4/5
    MAE: 8.538150787353516, RMSE: 14.942610756187738, MAPE: 1.051127351820469%, SMAPE: 1.0617228746414185%
  Repetition 5/5
    MAE: 9.365416526794434, RMSE: 14.590202472162987, MAPE: 1.1598723009228706%, SMAPE: 1.1708064079284668%
  Average MAE: 8.88807783126831 (±0.759683220914869), Average RMSE: 15.570789756460945 (±1.0823920935920464)
  Average



🏃 View run righteous-bass-138 at: http://localhost:5001/#/experiments/1/runs/3ef69a2198b644668c7252cf0a4f7f14
🧪 View experiment at: http://localhost:5001/#/experiments/1

Testing combination: LR=0.0005, Layers=2, Heads=8, FF=512, Dropout=0.1, Weight Decay=0.01
  Repetition 1/5
    MAE: 9.105676651000977, RMSE: 16.233802587057255, MAPE: 1.1391649022698402%, SMAPE: 1.1565929651260376%
  Repetition 2/5
    MAE: 8.913354873657227, RMSE: 14.58959640436251, MAPE: 1.1159797199070454%, SMAPE: 1.1181416511535645%
  Repetition 3/5
    MAE: 9.10792350769043, RMSE: 14.507864036324726, MAPE: 1.1218744330108166%, SMAPE: 1.1327749490737915%
  Repetition 4/5
    MAE: 10.394840240478516, RMSE: 17.70765642854543, MAPE: 1.2824793346226215%, SMAPE: 1.2982537746429443%
  Repetition 5/5
    MAE: 10.862841606140137, RMSE: 16.61881640171014, MAPE: 1.3448487967252731%, SMAPE: 1.3515654802322388%
  Average MAE: 9.676927375793458 (±0.7943459198019198), Average RMSE: 15.93154717160001 (±1.2284923243385117)
  Aver

KeyboardInterrupt: 