In [1]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from itertools import product
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Definindo constantes
DATA_DIR = "../../data/"
FILE_PATH = os.path.join(DATA_DIR, 'ts.pkl')
SEQ_LENGTH = 120  # Aumentado para capturar mais contexto
MB = 1_048_576

# 1. Carregar e reamostrar os dados
df = pd.read_pickle(FILE_PATH)
ts = df['value'].astype(float).resample('15min').mean().dropna()  # Testando 15min
dates = ts.index

# 2. Dividir os dados: 60% treino, 20% validação, 20% teste
train_size = int(0.6 * len(ts))
val_size = int(0.2 * len(ts))
train = ts[:train_size]
val = ts[train_size:train_size + val_size]
test = ts[train_size + val_size:]

# 3. Escalonar os dados
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
val_scaled = scaler.transform(val.values.reshape(-1, 1))
test_scaled = scaler.transform(test.values.reshape(-1, 1))

# 4. Criar sequências
def create_sequences(data, dates, seq_length):
    X, y, y_dates = [], [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
        y_dates.append(dates[i + seq_length])
    return np.array(X), np.array(y), np.array(y_dates)

X_train, y_train, y_dates_train = create_sequences(train_scaled, dates[:train_size], SEQ_LENGTH)
X_val, y_val, y_dates_val = create_sequences(val_scaled, dates[train_size:train_size + val_size], SEQ_LENGTH)
X_test, y_test, y_dates_test = create_sequences(test_scaled, dates[train_size + val_size:], SEQ_LENGTH)

# 5. Ajustar dimensões para o modelo Transformer
d_model = 128  # Aumentado para representações mais ricas
X_train = np.repeat(X_train, d_model, axis=2)
X_val = np.repeat(X_val, d_model, axis=2)
X_test = np.repeat(X_test, d_model, axis=2)

# 6. Converter para tensores PyTorch
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# 7. Definir codificação posicional
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return x

# 8. Definir o modelo Transformer
class Encoder(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_layers, dim_feedforward, dropout=0.1):
        super(Encoder, self).__init__()
        self.pos_encoder = PositionalEncoding(d_model)
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.linear_out = nn.Linear(d_model, 1)

    def forward(self, src):
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.linear_out(output[:, -1, :])  # Pegar apenas o último timestep
        return output

# Hiperparâmetros para tuning
learning_rates = [0.0001, 0.001, 0.005]
num_layers_list = [2, 3, 4]
nheads = [4, 8]
dim_feedforwards = [256, 512]

# Outros hiperparâmetros fixos
input_dim = d_model
batch_size = 32  # Reduzido para maior estabilidade
num_epochs = 50  # Aumentado com parada precoce

# Função de treinamento
def train_model(learning_rate, num_layers, nhead, dim_feedforward):
    model = Encoder(input_dim, d_model, nhead, num_layers, dim_feedforward)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        for i in range(0, len(X_train), batch_size):
            X_batch = X_train[i:i + batch_size]
            y_batch = y_train[i:i + batch_size]
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Validação
        model.eval()
        with torch.no_grad():
            y_val_pred = model(X_val)
            val_loss = criterion(y_val_pred, y_val)
        
        scheduler.step(val_loss)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break

    return model

# Grid search com repetições
best_avg_mae = float('inf')
best_avg_rmse = float('inf')
best_hyperparams = None
best_model = None
n_repetitions = 5

for lr, nl, nh, df in product(learning_rates, num_layers_list, nheads, dim_feedforwards):
    mae_list = []
    rmse_list = []
    
    print(f"\nTesting combination: LR={lr}, Layers={nl}, Heads={nh}, FF={df}")
    for rep in range(n_repetitions):
        print(f"  Repetition {rep+1}/{n_repetitions}")
        model = train_model(lr, nl, nh, df)
        model.eval()
        with torch.no_grad():
            y_pred = model(X_test)
        
        y_pred_rescaled = scaler.inverse_transform(y_pred.numpy()) / MB
        y_test_rescaled = scaler.inverse_transform(y_test.numpy()) / MB
        
        mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
        rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))
        
        mae_list.append(mae)
        rmse_list.append(rmse)
        print(f"    MAE: {mae}, RMSE: {rmse}")
    
    # Calcular médias
    avg_mae = np.mean(mae_list)
    avg_rmse = np.mean(rmse_list)
    std_mae = np.std(mae_list)
    std_rmse = np.std(rmse_list)
    
    print(f"  Average MAE: {avg_mae} (±{std_mae}), Average RMSE: {avg_rmse} (±{std_rmse})")
    
    if avg_mae < best_avg_mae:
        best_avg_mae = avg_mae
        best_avg_rmse = avg_rmse
        best_hyperparams = {'learning_rate': lr, 'num_layers': nl, 'nhead': nh, 'dim_feedforward': df}
        best_model = model

print(f'\nBest Hyperparameters: {best_hyperparams}')
print(f'Best Average MAE: {best_avg_mae}')
print(f'Best Average RMSE: {best_avg_rmse}')

# 9. Fazer previsões
model = best_model
model.eval()
with torch.no_grad():
    y_train_pred = model(X_train)
    y_test_pred = model(X_test)

# 10. Reverter o escalonamento e converter para MB
y_train_pred_mb = scaler.inverse_transform(y_train_pred.numpy()) / MB
y_train_mb = scaler.inverse_transform(y_train.numpy()) / MB
y_test_pred_mb = scaler.inverse_transform(y_test_pred.numpy()) / MB
y_test_mb = scaler.inverse_transform(y_test.numpy()) / MB

# 11. Preparar dados para plotagem
train_df = pd.DataFrame({
    'date': y_dates_train,
    'actual': y_train_mb.flatten(),
    'predicted': y_train_pred_mb.flatten()
}).sort_values('date')

test_df = pd.DataFrame({
    'date': y_dates_test,
    'actual': y_test_mb.flatten(),
    'predicted': y_test_pred_mb.flatten()
}).sort_values('date')

# 12. Plotar os resultados
plt.style.use('default')
fig, axs = plt.subplots(2, 1, figsize=(15, 10), sharex=False)

axs[0].plot(train_df['date'], train_df['actual'], label='Real', color='blue', linewidth=1.5)
axs[0].plot(train_df['date'], train_df['predicted'], label='Predito', color='red', alpha=0.7, linewidth=1.5)
axs[0].set_title('Conjunto de Treinamento (60%)', fontsize=12, pad=10)
axs[0].set_ylabel('Consumo de Memória (MB)', fontsize=10)
axs[0].legend(loc='upper left', fontsize=10)
axs[0].grid(True, linestyle='--', alpha=0.7)

axs[1].plot(test_df['date'], test_df['actual'], label='Real', color='blue', linewidth=1.5)
axs[1].plot(test_df['date'], test_df['predicted'], label='Predito', color='red', alpha=0.7, linewidth=1.5)
axs[1].set_title('Conjunto de Teste (20%)', fontsize=12, pad=10)
axs[1].set_xlabel('Data', fontsize=10)
axs[1].set_ylabel('Consumo de Memória (MB)', fontsize=10)
axs[1].legend(loc='upper left', fontsize=10)
axs[1].grid(True, linestyle='--', alpha=0.7)

for ax in axs:
    ax.xaxis.set_major_locator(mdates.AutoDateLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    ax.tick_params(axis='x', rotation=45, labelsize=9)
    ax.tick_params(axis='y', labelsize=9)

plt.suptitle('Predições do Transformer Otimizado - Prometheus (MB, Resample 15min)', fontsize=14, y=0.98)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig(os.path.join(DATA_DIR, 'prometheus_transformer_robust_15min.png'), dpi=300, bbox_inches='tight')
plt.close()


Testing combination: LR=0.0001, Layers=2, Heads=4, FF=256
  Repetition 1/5
    MAE: 18.479503631591797, RMSE: 26.878705004423924
  Repetition 2/5
    MAE: 20.714757919311523, RMSE: 26.27941525254367
  Repetition 3/5
    MAE: 12.292089462280273, RMSE: 15.233248155440597
  Repetition 4/5
    MAE: 16.082731246948242, RMSE: 19.350572113332497
  Repetition 5/5
    MAE: 22.83884620666504, RMSE: 31.594968023318703
  Average MAE: 18.081585693359376 (±3.667073709544625), Average RMSE: 23.867381709811877 (±5.823253303681761)

Testing combination: LR=0.0001, Layers=2, Heads=4, FF=512
  Repetition 1/5
    MAE: 17.34595489501953, RMSE: 23.81189380585034
  Repetition 2/5
    MAE: 13.186022758483887, RMSE: 17.113495686712888
  Repetition 3/5
    MAE: 17.213817596435547, RMSE: 21.703783062377628
  Repetition 4/5
    MAE: 16.213184356689453, RMSE: 24.491914621685613
  Repetition 5/5
    MAE: 18.022062301635742, RMSE: 22.660091269192296
  Average MAE: 16.396208381652833 (±1.7060301149621007), Average R