In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mlflow
import mlflow.keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import matplotlib.dates as mdates
import tensorflow as tf

# Configurar MLflow
mlflow.set_tracking_uri("http://localhost:5001")
mlflow.set_experiment("Prometheus_LSTM_Experiment_MRFO_15min")

# Definindo constantes
DATA_DIR = "../../data/"
FILE_PATH = os.path.join(DATA_DIR, 'ts.pkl')
SEQ_LENGTH = 48  # 12 horas (48 * 15min)
MB = 1_048_576

# 1. Carregar e reamostrar os dados para 15 minutos
df = pd.read_pickle(FILE_PATH)
ts = df['value'].astype(float)
# Aplicar suavização com média móvel (window=3)
ts = ts.rolling(window=3, min_periods=1).mean()
ts = ts.resample('15min').mean().dropna()
dates = ts.index

# 2. Dividir os dados: 60% treino, 20% validação, 20% teste
train_size = int(0.6 * len(ts))
val_size = int(0.2 * len(ts))
train = ts[:train_size]
val = ts[train_size:train_size + val_size]
test = ts[train_size + val_size:]

# 3. Escalonar os dados
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
val_scaled = scaler.transform(val.values.reshape(-1, 1))
test_scaled = scaler.transform(test.values.reshape(-1, 1))

# 4. Criar sequências
def create_sequences(data, dates, seq_length):
    X, y, y_dates = [], [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
        y_dates.append(dates[i + seq_length])
    return np.array(X), np.array(y), np.array(y_dates)

X_train, y_train, y_dates_train = create_sequences(train_scaled, dates[:train_size], SEQ_LENGTH)
X_val, y_val, y_dates_val = create_sequences(val_scaled, dates[train_size:train_size + val_size], SEQ_LENGTH)
X_test, y_test, y_dates_test = create_sequences(test_scaled, dates[train_size + val_size:], SEQ_LENGTH)

# 5. Reshape para 3D
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# 6. Função para calcular SMAPE
def smape(y_true, y_pred):
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    diff = np.abs(y_true - y_pred) / denominator
    return 100 * np.mean(diff)

# 7. Função Objetivo para MRFO
def objective_function(params):
    lstm_units = max(50, min(150, int(params[0])))
    dropout_rate = max(0.2, min(0.5, params[1]))
    batch_size = max(32, min(128, int(params[2])))
    learning_rate = max(0.0001, min(0.001, params[3]))
    n_steps = max(10, min(min(len(train_scaled), len(test_scaled)) - 1, int(params[4])))
    epochs = max(10, min(200, int(params[5])))

    # Criar sequências com n_steps
    X_train, y_train, _ = create_sequences(train_scaled, dates[:train_size], n_steps)
    X_test, y_test, y_dates_test = create_sequences(test_scaled, dates[train_size + val_size:], n_steps)

    if X_test.shape[0] == 0:
        return float("inf"), float("inf"), float("inf"), float("inf")

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    # Definir o modelo
    model = Sequential()
    model.add(LSTM(lstm_units, activation='tanh', input_shape=(n_steps, 1), return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(lstm_units, activation='tanh'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse')

    # Treinar o modelo
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

    # Fazer previsões
    test_pred = model.predict(X_test, verbose=0)
    test_pred = scaler.inverse_transform(test_pred) / MB
    y_test_rescaled = scaler.inverse_transform(y_test) / MB

    # Calcular métricas
    rmse = np.sqrt(mean_squared_error(y_test_rescaled, test_pred))
    mse = mean_squared_error(y_test_rescaled, test_pred)
    mae = mean_absolute_error(y_test_rescaled, test_pred)
    mape = mean_absolute_percentage_error(y_test_rescaled, test_pred) * 100
    smape_val = smape(y_test_rescaled, test_pred)

    # Registrar no MLflow
    with mlflow.start_run():
        mlflow.set_tag("modelo", "LSTM")
        mlflow.set_tag("otimização", "MRFO")
        mlflow.set_tag("versão", "v1.0")
        mlflow.log_param("lstm_units", lstm_units)
        mlflow.log_param("dropout_rate", dropout_rate)
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("n_steps", n_steps)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("resample_interval", "15min")
        mlflow.log_param("seq_length", n_steps)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("mape", mape)
        mlflow.log_metric("smape", smape_val)
        mlflow.keras.log_model(model, "model")

    return rmse, mse, mae, mape, smape_val

# 8. Implementação do MRFO
class MRFO:
    def __init__(self, obj_func, dim, SearchAgents_no, max_iter, lb, ub):
        self.obj_func = obj_func
        self.dim = dim
        self.SearchAgents_no = SearchAgents_no
        self.max_iter = max_iter
        self.lb = lb
        self.ub = ub
        self.positions = np.random.uniform(0, 1, (self.SearchAgents_no, self.dim)) * (self.ub - self.lb) + self.lb
        self.fitness = np.array([self.obj_func(self.clip_params(ind))[0] for ind in self.positions])
        self.best_idx = np.argmin(self.fitness)
        self.gbest = self.positions[self.best_idx].copy()
        self.gbest_fitness = self.fitness[self.best_idx]

    def clip_params(self, params):
        params[0] = max(50, min(150, params[0]))  # lstm_units
        params[1] = max(0.2, min(0.5, params[1]))  # dropout_rate
        params[2] = max(32, min(128, params[2]))  # batch_size
        params[3] = max(0.0001, min(0.001, params[3]))  # learning_rate
        params[4] = max(10, min(min(len(train_scaled), len(test_scaled)) - 1, params[4]))  # n_steps
        params[5] = max(10, min(200, params[5]))  # epochs
        return params

    def chain_foraging(self, i):
        r = np.random.rand(self.dim)
        self.positions[i] = self.positions[i] + r * (self.gbest - self.positions[i])
        self.positions[i] = self.clip_params(self.positions[i])

    def cyclone_foraging(self, i, t, max_iter):
        r = np.random.rand(self.dim)
        A = 2 * (1 - t / max_iter)
        direction = np.random.choice([-1, 1], size=self.dim)
        self.positions[i] = self.positions[i] + A * direction * r * (self.gbest - self.positions[i])
        self.positions[i] = self.clip_params(self.positions[i])

    def somersault_foraging(self, i):
        S = 2 * np.random.rand(self.dim) - 1
        somersault_factor = 2
        self.positions[i] = self.positions[i] + somersault_factor * (S * self.gbest - self.positions[i])
        self.positions[i] = self.clip_params(self.positions[i])

    def optimize(self):
        for t in range(self.max_iter):
            for i in range(self.SearchAgents_no):
                if np.random.rand() < 0.5:
                    self.chain_foraging(i)
                else:
                    self.cyclone_foraging(i, t, self.max_iter)

                fitness_candidate = self.obj_func(self.clip_params(self.positions[i]))[0]

                if fitness_candidate < self.fitness[i]:
                    self.fitness[i] = fitness_candidate
                    if fitness_candidate < self.gbest_fitness:
                        self.gbest_fitness = fitness_candidate
                        self.gbest = self.positions[i].copy()

            for i in range(self.SearchAgents_no):
                self.somersault_foraging(i)

            print(f"Iteração {t+1}/{self.max_iter}, Melhor Fitness (RMSE): {self.gbest_fitness:.4f}")

        return self.gbest, self.gbest_fitness

# 9. Configurar e executar MRFO
dim = 6
SearchAgents_no = 10
max_iter = 20
lb = np.array([50, 0.2, 32, 0.0001, 10, 10])
ub = np.array([150, 0.5, 128, 0.001, 100, 200])
mrfo = MRFO(objective_function, dim, SearchAgents_no, max_iter, lb, ub)
best_params, best_fitness = mrfo.optimize()

# 10. Treinar com melhores hiperparâmetros e calcular métricas
def train_and_evaluate(params):
    lstm_units = max(50, min(150, int(params[0])))
    dropout_rate = max(0.2, min(0.5, params[1]))
    batch_size = max(32, min(128, int(params[2])))
    learning_rate = max(0.0001, min(0.001, params[3]))
    n_steps = max(10, min(min(len(train_scaled), len(test_scaled)) - 1, int(params[4])))
    epochs = max(10, min(200, int(params[5])))

    X_train, y_train, _ = create_sequences(train_scaled, dates[:train_size], n_steps)
    X_test, y_test, y_dates_test = create_sequences(test_scaled, dates[train_size + val_size:], n_steps)

    if X_test.shape[0] == 0:
        return float("inf"), float("inf"), float("inf"), float("inf"), None

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = Sequential()
    model.add(LSTM(lstm_units, activation='tanh', input_shape=(n_steps, 1), return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(lstm_units, activation='tanh'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse')

    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

    test_pred = model.predict(X_test, verbose=0)
    test_pred = scaler.inverse_transform(test_pred) / MB
    y_test_rescaled = scaler.inverse_transform(y_test) / MB

    rmse = np.sqrt(mean_squared_error(y_test_rescaled, test_pred))
    mse = mean_squared_error(y_test_rescaled, test_pred)
    mae = mean_absolute_error(y_test_rescaled, test_pred)
    mape = mean_absolute_percentage_error(y_test_rescaled, test_pred) * 100
    smape_val = smape(y_test_rescaled, test_pred)

    return rmse, mse, mae, mape, smape_val, model, y_test_rescaled, test_pred, y_dates_test

# 11. Executar repetições com melhores hiperparâmetros
n_repetitions = 5
with mlflow.start_run(run_name="Best_MRFO_Run"):
    mlflow.log_param("lstm_units", int(best_params[0]))
    mlflow.log_param("dropout_rate", best_params[1])
    mlflow.log_param("batch_size", int(best_params[2]))
    mlflow.log_param("learning_rate", best_params[3])
    mlflow.log_param("n_steps", int(best_params[4]))
    mlflow.log_param("epochs", int(best_params[5]))
    mlflow.log_param("resample_interval", "15min")
    mlflow.log_param("seq_length", SEQ_LENGTH)

    print(f"\nBest MRFO Configuration: LSTM Units={int(best_params[0])}, Dropout Rate={best_params[1]:.2f}, "
          f"Batch Size={int(best_params[2])}, Learning Rate={best_params[3]:.6f}, "
          f"n_steps={int(best_params[4])}, Epochs={int(best_params[5])}")

    results = [train_and_evaluate(best_params) for _ in range(n_repetitions)]

    rmse_list = [result[0] for result in results]
    mse_list = [result[1] for result in results]
    mae_list = [result[2] for result in results]
    mape_list = [result[3] for result in results]
    smape_list = [result[4] for result in results]
    models = [result[5] for result in results]
    y_test_rescaled = results[0][6]
    test_pred = results[0][7]
    y_dates_test = results[0][8]

    for rep, (rmse, mse, mae, mape, smape_val, _, _, _, _) in enumerate(results):
        print(f"  Repetition {rep+1}/{n_repetitions}")
        print(f"    MAE: {mae}, RMSE: {rmse}, MAPE: {mape}%, SMAPE: {smape_val}%")

    avg_rmse = np.mean(rmse_list)
    avg_mse = np.mean(mse_list)
    avg_mae = np.mean(mae_list)
    avg_mape = np.mean(mape_list)
    avg_smape = np.mean(smape_list)
    std_rmse = np.std(rmse_list)
    std_mse = np.std(mse_list)
    std_mae = np.std(mae_list)
    std_mape = np.std(mape_list)
    std_smape = np.std(smape_list)

    print(f"  Average RMSE: {avg_rmse} (±{std_rmse}), Average MSE: {avg_mse} (±{std_mse})")
    print(f"  Average MAE: {avg_mae} (±{std_mae}), Average MAPE: {avg_mape}% (±{std_mape})")
    print(f"  Average SMAPE: {avg_smape}% (±{std_smape})")

    mlflow.log_metric("avg_rmse", avg_rmse)
    mlflow.log_metric("std_rmse", std_rmse)
    mlflow.log_metric("avg_mse", avg_mse)
    mlflow.log_metric("std_mse", std_mse)
    mlflow.log_metric("avg_mae", avg_mae)
    mlflow.log_metric("std_mae", std_mae)
    mlflow.log_metric("avg_mape", avg_mape)
    mlflow.log_metric("std_mape", std_mape)
    mlflow.log_metric("avg_smape", avg_smape)
    mlflow.log_metric("std_smape", std_smape)

    best_model = models[0]
    mlflow.keras.log_model(best_model, "best_model")

# 12. Plotar resultados
plt.figure(figsize=(12, 6))
plt.plot(y_dates_test, scaler.inverse_transform(y_test_rescaled * MB), label="Valores Reais", color='blue')
plt.plot(y_dates_test, scaler.inverse_transform(test_pred * MB), label="Previsões do Modelo", color='red', linestyle='--')
plt.title("Valores Reais vs Previsões do Modelo - Consumo de Memória")
plt.xlabel("Data")
plt.ylabel("Consumo de Memória (Bytes)")
plt.legend()
plt.gca().xaxis.set_major_locator(mdates.AutoDateLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(os.path.join(DATA_DIR, 'prometheus_lstm_mrfo_15min.png'), dpi=300, bbox_inches='tight')
plt.close()