In [None]:
%pip install mlflow joblib

In [1]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import mlflow
import mlflow.pytorch

# Configurar MLflow
mlflow.set_tracking_uri("http://localhost:5001")
mlflow.set_experiment("Prometheus_Transformer_Experiment_MRFO_RMSE_Optimized")

# Definir o dispositivo (usar MPS para Apple Silicon GPU se disponível)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# Definindo constantes
DATA_DIR = "../../data/"
FILE_PATH = os.path.join(DATA_DIR, 'ts.pkl')
SEQ_LENGTH = 48  # 12 horas (48 * 15min)
MB = 1_048_576

# 1. Carregar e reamostrar os dados para 15 minutos
df = pd.read_pickle(FILE_PATH)
ts = df['value'].astype(float)
# Aplicar suavização com média móvel (window=3)
ts = ts.rolling(window=3, min_periods=1).mean()
ts = ts.resample('15min').mean().dropna()
dates = ts.index

# 2. Dividir os dados: 60% treino, 20% validação, 20% teste
train_size = int(0.6 * len(ts))
val_size = int(0.2 * len(ts))
train = ts[:train_size]
val = ts[train_size:train_size + val_size]
test = ts[train_size + val_size:]

# 3. Escalonar os dados
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
val_scaled = scaler.transform(val.values.reshape(-1, 1))
test_scaled = scaler.transform(test.values.reshape(-1, 1))

# 4. Criar sequências
def create_sequences(data, dates, seq_length):
    X, y, y_dates = [], [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
        y_dates.append(dates[i + seq_length])
    return np.array(X), np.array(y), np.array(y_dates)

X_train, y_train, y_dates_train = create_sequences(train_scaled, dates[:train_size], SEQ_LENGTH)
X_val, y_val, y_dates_val = create_sequences(val_scaled, dates[train_size:train_size + val_size], SEQ_LENGTH)
X_test, y_test, y_dates_test = create_sequences(test_scaled, dates[train_size + val_size:], SEQ_LENGTH)

# 5. Ajustar dimensões para o modelo Transformer
d_model = 128
X_train = np.repeat(X_train, d_model, axis=2)
X_val = np.repeat(X_val, d_model, axis=2)
X_test = np.repeat(X_test, d_model, axis=2)

# 6. Converter para tensores PyTorch e mover para o dispositivo
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

# 7. Definir codificação posicional
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return x

# 8. Definir o modelo Transformer com dropout
class Encoder(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_layers, dim_feedforward, dropout=0.1):
        super(Encoder, self).__init__()
        self.pos_encoder = PositionalEncoding(d_model)
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.dropout = nn.Dropout(dropout)
        self.linear_out = nn.Linear(d_model, 1, bias=True)
        # Inicializar pesos com Xavier
        nn.init.xavier_uniform_(self.linear_out.weight)
        if self.linear_out.bias is not None:
            nn.init.zeros_(self.linear_out.bias)

    def forward(self, src):
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.dropout(output[:, -1, :])
        output = self.linear_out(output)
        return output

# Outros hiperparâmetros fixos
input_dim = d_model
batch_size = 64  # Aumentado para maior estabilidade
num_epochs = 50

# Função para calcular SMAPE
def smape(y_true, y_pred):
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    diff = np.abs(y_true - y_pred) / denominator
    return 100 * np.mean(diff)

# Função de treinamento e avaliação para uma repetição
def train_and_evaluate(learning_rate):
    # Fixar hiperparâmetros conhecidos
    num_layers = 2
    nhead = 4
    dim_feedforward = 512
    
    model = Encoder(input_dim, d_model, nhead, num_layers, dim_feedforward, dropout=0.1).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

    for epoch in range(num_epochs):
        model.train()
        for i in range(0, len(X_train), batch_size):
            X_batch = X_train[i:i + batch_size]
            y_batch = y_train[i:i + batch_size]
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            optimizer.zero_grad()
            loss.backward()
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
        scheduler.step()

    model.eval()
    with torch.no_grad():
        y_pred = model(X_test)
    
    # Mover os dados de volta para a CPU para cálculos de métricas
    y_pred_rescaled = scaler.inverse_transform(y_pred.cpu().numpy()) / MB
    y_test_rescaled = scaler.inverse_transform(y_test.cpu().numpy()) / MB
    
    mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
    rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))
    mape = mean_absolute_percentage_error(y_test_rescaled, y_pred_rescaled) * 100
    smape_val = smape(y_test_rescaled, y_pred_rescaled)
    
    return mae, rmse, mape, smape_val, model

# Função de mapeamento de valores contínuos para discretos
def map_continuous_to_discrete(value, discrete_values):
    idx = int(round(value * (len(discrete_values) - 1)))
    idx = max(0, min(idx, len(discrete_values) - 1))
    return discrete_values[idx]

# Função de avaliação para uma manta (usada no MRFO)
def evaluate_manta(params, n_repetitions):
    lr = 10 ** params[0]  # learning_rate (log scale)
    
    # Executar repetições sequencialmente (GPU não se beneficia de paralelismo aqui)
    results = [train_and_evaluate(lr) for _ in range(n_repetitions)]
    rmse_list = [result[1] for result in results]  # Minimizar RMSE
    return np.mean(rmse_list)

# Implementação do MRFO com saídas intermediárias e early stopping
class MRFO:
    def __init__(self, objective_func, bounds, n_mantas=30, max_iter=100, patience=7, n_repetitions=5):
        self.objective_func = objective_func
        self.bounds = np.array(bounds).T  # Shape: (2, dim)
        self.n_mantas = n_mantas
        self.max_iter = max_iter
        self.patience = patience
        self.n_repetitions = n_repetitions
        self.dim = self.bounds.shape[1]
        
        # Inicializar população
        self.positions = np.zeros((self.n_mantas, self.dim))
        for d in range(self.dim):
            self.positions[:, d] = np.random.uniform(self.bounds[0, d], self.bounds[1, d], self.n_mantas)
        self.fitness = np.array([float('inf')] * self.n_mantas)
        self.best_position = None
        self.best_fitness = float('inf')
        self.no_improvement_count = 0  # Contador para early stopping

    def optimize(self):
        for t in range(self.max_iter):
            print(f"\nIteration {t+1}/{self.max_iter}")
            # Avaliar fitness de todas as mantas sequencialmente (GPU não se beneficia de paralelismo aqui)
            fitness_results = [self.objective_func(self.positions[i], self.n_repetitions) for i in range(self.n_mantas)]
            self.fitness = np.array(fitness_results)

            # Exibir resultados e atualizar o melhor fitness
            for i in range(self.n_mantas):
                print(f"  Manta {i+1}/{self.n_mantas}: Fitness (RMSE) = {self.fitness[i]:.4f}")
                if self.fitness[i] < self.best_fitness:
                    self.best_fitness = self.fitness[i]
                    self.best_position = self.positions[i].copy()
                    self.no_improvement_count = 0  # Resetar o contador
                    print(f"  New Best Fitness: {self.best_fitness:.4f}")
                else:
                    self.no_improvement_count += 1

            # Registrar melhor fitness no MLflow
            with mlflow.start_run(run_name=f"MRFO_Iteration_{t+1}"):
                mlflow.log_metric("best_fitness_rmse", self.best_fitness)
                # Registrar os hiperparâmetros correspondentes ao melhor fitness
                lr = 10 ** self.best_position[0]
                mlflow.log_param("learning_rate", lr)
                mlflow.log_param("num_layers", 2)
                mlflow.log_param("nhead", 4)
                mlflow.log_param("dim_feedforward", 512)

            # Critério de parada precoce
            if self.no_improvement_count >= self.patience:
                print(f"\nEarly stopping triggered after {t+1} iterations due to no improvement for {self.patience} iterations.")
                break

            # Atualizar posições usando Chain Foraging, Cyclone Foraging e Somersault Foraging
            for i in range(self.n_mantas):
                r = np.random.random(self.dim)
                r1 = np.random.random()

                # Chain Foraging
                if r1 < 0.5:
                    if i == 0:
                        self.positions[i] = self.positions[i] + r * (self.best_position - self.positions[i]) + \
                                            r * (self.best_position - self.positions[i])
                    else:
                        self.positions[i] = self.positions[i] + r * (self.positions[i-1] - self.positions[i]) + \
                                            r * (self.best_position - self.positions[i])

                # Cyclone Foraging
                else:
                    beta = 2 * np.exp(r1 * (self.max_iter - t + 1) / self.max_iter) * np.sin(2 * np.pi * r1)
                    if r1 < 0.5:
                        self.positions[i] = self.positions[i] + r * (self.best_position - beta * self.positions[i])
                    else:
                        idx = np.random.randint(0, self.n_mantas)
                        self.positions[i] = self.positions[i] + r * (self.positions[idx] - beta * self.positions[i])

                # Somersault Foraging
                r2 = np.random.random()
                self.positions[i] = self.positions[i] + 0.5 * (self.best_position + self.positions[i]) * (2 * r2 - 1)

                # Garantir que as posições estejam dentro dos limites
                self.positions[i] = np.clip(self.positions[i], self.bounds[0], self.bounds[1])

        return self.best_position, self.best_fitness

# MRFO para otimizar hiperparâmetros
n_repetitions = 5
bounds = [
    [-3.0458, -2.9586],  # log10(learning_rate): [0.0009, 0.0011]
]

mrfo = MRFO(lambda params, reps: evaluate_manta(params, reps), bounds, n_mantas=30, max_iter=10, patience=7, n_repetitions=n_repetitions)
best_position, best_fitness = mrfo.optimize()

# Mapear a melhor posição para hiperparâmetros
best_lr = 10 ** best_position[0]

# Treinar o modelo com a melhor configuração para obter métricas finais
with mlflow.start_run(run_name="Best_MRFO_Run"):
    # Registrar hiperparâmetros
    mlflow.log_param("learning_rate", best_lr)
    mlflow.log_param("num_layers", 2)
    mlflow.log_param("nhead", 4)
    mlflow.log_param("dim_feedforward", 512)
    mlflow.log_param("seq_length", SEQ_LENGTH)
    mlflow.log_param("resample_interval", "15min")
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("num_epochs", num_epochs)

    print(f"\nBest MRFO Configuration: LR={best_lr}, Layers=2, Heads=4, FF=512")
    
    # Executar repetições sequencialmente (GPU não se beneficia de paralelismo aqui)
    results = [train_and_evaluate(best_lr) for _ in range(n_repetitions)]

    mae_list = [result[0] for result in results]
    rmse_list = [result[1] for result in results]
    mape_list = [result[2] for result in results]
    smape_list = [result[3] for result in results]
    models = [result[4] for result in results]

    for rep, (mae, rmse, mape, smape_val, _) in enumerate(results):
        print(f"  Repetition {rep+1}/{n_repetitions}")
        print(f"    MAE: {mae}, RMSE: {rmse}, MAPE: {mape}%, SMAPE: {smape_val}%")

    avg_mae = np.mean(mae_list)
    avg_rmse = np.mean(rmse_list)
    avg_mape = np.mean(mape_list)
    avg_smape = np.mean(smape_list)
    std_mae = np.std(mae_list)
    std_rmse = np.std(rmse_list)
    std_mape = np.std(mape_list)
    std_smape = np.std(smape_list)

    print(f"  Average MAE: {avg_mae} (±{std_mae}), Average RMSE: {avg_rmse} (±{std_rmse})")
    print(f"  Average MAPE: {avg_mape}% (±{std_mape}), Average SMAPE: {avg_smape}% (±{std_smape})")

    # Registrar métricas no MLflow
    mlflow.log_metric("avg_mae", avg_mae)
    mlflow.log_metric("std_mae", std_mae)
    mlflow.log_metric("avg_rmse", avg_rmse)
    mlflow.log_metric("std_rmse", std_rmse)
    mlflow.log_metric("avg_mape", avg_mape)
    mlflow.log_metric("std_mape", std_mape)
    mlflow.log_metric("avg_smape", avg_smape)
    mlflow.log_metric("std_smape", std_smape)

    best_model = models[0]
    mlflow.pytorch.log_model(best_model, "best_model")

# 9. Fazer previsões
best_model.eval()
with torch.no_grad():
    y_train_pred = best_model(X_train)
    y_test_pred = best_model(X_test)

# 10. Reverter o escalonamento e converter para MB
y_train_pred_mb = scaler.inverse_transform(y_train_pred.cpu().numpy()) / MB
y_train_mb = scaler.inverse_transform(y_train.cpu().numpy()) / MB
y_test_pred_mb = scaler.inverse_transform(y_test_pred.cpu().numpy()) / MB
y_test_mb = scaler.inverse_transform(y_test.cpu().numpy()) / MB

# 11. Preparar dados para plotagem
train_df = pd.DataFrame({
    'date': y_dates_train,
    'actual': y_train_mb.flatten(),
    'predicted': y_train_pred_mb.flatten()
}).sort_values('date')

test_df = pd.DataFrame({
    'date': y_dates_test,
    'actual': y_test_mb.flatten(),
    'predicted': y_test_pred_mb.flatten()
}).sort_values('date')

# 12. Plotar os resultados
plt.style.use('default')
fig, axs = plt.subplots(2, 1, figsize=(15, 10), sharex=False)

axs[0].plot(train_df['date'], train_df['actual'], label='Real', color='blue', linewidth=1.5)
axs[0].plot(train_df['date'], train_df['predicted'], label='Predito', color='red', alpha=0.7, linewidth=1.5)
axs[0].set_title('Conjunto de Treinamento (60%)', fontsize=12, pad=10)
axs[0].set_ylabel('Consumo de Memória (MB)', fontsize=10)
axs[0].legend(loc='upper left', fontsize=10)
axs[0].grid(True, linestyle='--', alpha=0.7)

axs[1].plot(test_df['date'], test_df['actual'], label='Real', color='blue', linewidth=1.5)
axs[1].plot(test_df['date'], test_df['predicted'], label='Predito', color='red', alpha=0.7, linewidth=1.5)
axs[1].set_title('Conjunto de Teste (20%)', fontsize=12, pad=10)
axs[1].set_xlabel('Data', fontsize=10)
axs[1].set_ylabel('Consumo de Memória (MB)', fontsize=10)
axs[1].legend(loc='upper left', fontsize=10)
axs[1].grid(True, linestyle='--', alpha=0.7)

for ax in axs:
    ax.xaxis.set_major_locator(mdates.AutoDateLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    ax.tick_params(axis='x', rotation=45, labelsize=9)
    ax.tick_params(axis='y', labelsize=9)

plt.suptitle('Predições do Transformer Otimizado - Prometheus (MB, Resample 15min)', fontsize=14, y=0.98)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig(os.path.join(DATA_DIR, 'prometheus_transformer_mrfo_15min.png'), dpi=300, bbox_inches='tight')
plt.close()

2025/05/04 17:38:02 INFO mlflow.tracking.fluent: Experiment with name 'Prometheus_Transformer_Experiment_MRFO_RMSE_Optimized' does not exist. Creating a new experiment.


Using device: mps

Iteration 1/10
  Manta 1/30: Fitness (RMSE) = 16.0810
  New Best Fitness: 16.0810
  Manta 2/30: Fitness (RMSE) = 16.7282
  Manta 3/30: Fitness (RMSE) = 14.7150
  New Best Fitness: 14.7150
  Manta 4/30: Fitness (RMSE) = 14.1698
  New Best Fitness: 14.1698
  Manta 5/30: Fitness (RMSE) = 14.7550
  Manta 6/30: Fitness (RMSE) = 14.5088
  Manta 7/30: Fitness (RMSE) = 14.8400
  Manta 8/30: Fitness (RMSE) = 13.9961
  New Best Fitness: 13.9961
  Manta 9/30: Fitness (RMSE) = 13.9786
  New Best Fitness: 13.9786
  Manta 10/30: Fitness (RMSE) = 13.8546
  New Best Fitness: 13.8546
  Manta 11/30: Fitness (RMSE) = 13.3706
  New Best Fitness: 13.3706
  Manta 12/30: Fitness (RMSE) = 15.5195
  Manta 13/30: Fitness (RMSE) = 14.3447
  Manta 14/30: Fitness (RMSE) = 14.5047
  Manta 15/30: Fitness (RMSE) = 14.1857
  Manta 16/30: Fitness (RMSE) = 14.1653
  Manta 17/30: Fitness (RMSE) = 16.1175
  Manta 18/30: Fitness (RMSE) = 15.3538
  Manta 19/30: Fitness (RMSE) = 15.5647
  Manta 20/30: Fitn



🏃 View run Best_MRFO_Run at: http://localhost:5001/#/experiments/9/runs/a2d7f7dceb4b457eb2a8ab050751bf7a
🧪 View experiment at: http://localhost:5001/#/experiments/9
