# Utilit√°rios Unificados ‚Äî Modelos, Datasets e Ru√≠do

Este arquivo re√∫ne tr√™s fun√ß√µes principais, com interface unificada e documenta√ß√£o clara:

1. `load_model_unificado(modelo, caminho, ...)`
   - Carrega qualquer modelo: `linear`, `mlp`, `lstm` (Keras) ou `tft` (PyTorch Forecasting).
   - Aceita arquivo/diret√≥rio EXATO ou uma pasta raiz para descoberta recursiva.

2. Carregamento de dados (Parquet)
   - `linear/mlp/lstm` ‚Üí Parquet com `*.meta.json` contendo `x_dim`, `y_dim` (e `seq_len`, `lead` no LSTM).
   - `tft` ‚Üí Parquet (padr√£o: retorna DataFrame; opcional: cria `TimeSeriesDataSet`).

3. `add_noise_features(obj, sigma, tipo, ...)`
   - Adiciona ru√≠do GAUSSIANO somente nas FEATURES.
   - `tipo='tfdata'` ‚Üí aplica em `tf.data.Dataset` (x,y).
   - `tipo='tft'` ‚Üí aplica em batches de `TimeSeriesDataSet`/`DataLoader` (chaves `encoder_cont`/`decoder_cont`).

> Observa√ß√£o: O notebook foi simplificado para Parquet apenas (sem TFRecords).

## 1) Carregamento Unificado de Modelos

Contrato r√°pido:
- Entradas:
  - `modelo`: `linear` | `mlp` | `lstm` | `tft`
  - `caminho`: arquivo/diret√≥rio exato OU uma pasta para varredura recursiva
  - `prefer_exts` (opcional): lista de extens√µes a priorizar (ex.: `[".cpfg", ".ckpt"]` para TFT)
  - `allow_unsafe` (bool): permite desserializa√ß√£o insegura apenas para artefatos LOCAIS (Lambda em Keras)
- Sa√≠das: `(obj_modelo, info)`
  - `obj_modelo`: instancia do modelo carregado (Keras ou TemporalFusionTransformer)
  - `info`: dicion√°rio com metadados √∫teis (`path`, `backend`, `kind`)

In [1]:
# Carregamento dos modelos treinados
from tensorflow import keras
import json, os
import tensorflow as tf
from keras.saving import register_keras_serializable  

@register_keras_serializable()
def repeat_to_seq_len(inputs):
    emb, feats = inputs
    seq_len = tf.shape(feats)[1]
    emb_tiled = tf.tile(emb, [1, seq_len, 1])  # (batch, seq_len, emb_dim)
    return emb_tiled

def load_model(path: str):
    if not os.path.exists(path):
        raise FileNotFoundError(f"‚ùå File not found: {path}")

    ext = os.path.splitext(path)[1].lower()

    # === Load model ===
    if ext == ".keras":
        # ‚úÖ repeat dynamically with tf.tile (preserves shape)
        model = keras.models.load_model(path)
        print("‚úÖ TensorFlow model loaded.")
    else:
        raise ValueError(f"‚ùå Unsupported file extension: {ext}")

    # === Load optional JSON config ===
    json_path = f"{os.path.splitext(path)[0]}.model.json"
    config = None
    if os.path.exists(json_path):
        with open(json_path, "r") as f:
            config = json.load(f)
        print(f"üß© Loaded config: {json_path}")

    return model, config


## Carregando modelo linear
linear, info_linear = load_model('./modelos/treinamento/linear.keras')

## Carregando modelo MLP
mlp, info_mlp = load_model('./modelos/treinamento/mlp.keras')

## Carregando modelo LSTM
lstm, info_lstm = load_model('./modelos/treinamento/lstm.keras')

model_list = [
    (linear, info_linear),
    (mlp, info_mlp),
    (lstm, info_lstm)
]

2025-11-13 07:47:58.978429: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
I0000 00:00:1763030881.161941   17053 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9975 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:06:00.0, compute capability: 8.6
  saveable.load_own_variables(weights_store.get(inner_path))


‚úÖ TensorFlow model loaded.
‚úÖ TensorFlow model loaded.
‚úÖ TensorFlow model loaded.


## 2) Carregando preprocessadores

In [2]:
import glob
import pickle
import tensorflow as tf
from typing import Dict, Any, List, Tuple, Optional

# === LINEAR ===
linear_path = "./data/treinamento/preprocessor/linear_preproc.pkl"
with open(linear_path, "rb") as f:
    linear_preproc = pickle.load(f)


# === LSTM ===
lstm_path = "./data/treinamento/preprocessor/lstm_preproc.pkl"
with open(lstm_path, "rb") as f:
    lstm_preproc = pickle.load(f)

# === PREPROCESSORS DICT ===
preprocessors = {
    "linear": linear_preproc,
    "mlp": linear_preproc,  # MLP usa o mesmo pr√©-processador do Linear
    "lstm": lstm_preproc
}

print("‚úÖ Todos os preprocessadores carregados com sucesso.")


‚úÖ Todos os preprocessadores carregados com sucesso.


# Fun√ß√µes helper para an√°lise dos modelos

## Fun√ß√µes de coleta de dados

In [3]:

import pandas as pd
from preprocessor_keras import LinearPreprocessor, LSTMPreprocessor
import numpy as np

def load_dataset_info(model_type: str, dataset_type: str, problem_name: str) -> Dict[str, Any]:
    info_path = f'./data/{problem_name}/{model_type}_dataset_{dataset_type}.meta.json'
    if not os.path.exists(info_path):
        raise FileNotFoundError(f"‚ùå Dataset info file not found: {info_path}")
    with open(info_path, 'r') as f:
        info = json.load(f)
    return info



def get_problem_df(model_type: str, lag, lead, country_list, problem_name) -> pd.DataFrame:
    # Instanciando preprocessadores
    dataset_info = load_dataset_info(model_type, "test", problem_name)
    destino_dir = f'./data/{problem_name}'

    if model_type == 'linear':
        df, dataset_info = LinearPreprocessor.load_linear_parquet_dataset(
        data_dir=destino_dir,
        split='test',
        batch_size=256,
        shuffle=True
        )
    elif model_type == 'lstm':
        df,dataset_info = LSTMPreprocessor.load_lstm_parquet_dataset(
        data_dir=destino_dir,
        split='test',
        batch_size=256,
        shuffle=True
        )
    else:
        raise ValueError(f"Modelo desconhecido: {model_type}")
    return df, dataset_info

## Fun√ß√µes de avalia√ß√£o

In [4]:
import os
import shutil
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import pandas as pd
from typing import Optional, Dict, Any

def avaliar_modelo_keras(
    model,
    dataset,
    titulo="Avalia√ß√£o do Modelo",
    problem_name="problema",
    max_samples=None,
    n_leads=1,
    save_dir="./resultados/graficos",
    show_plots=True,
    df_info={},
    preproc=None
):
    print(f"üöÄ Avaliando modelo '{model.name}'...")

    # ==========================================================
    # 1) EVALUATE
    # ==========================================================
    try:
        eval_out = model.evaluate(dataset, verbose=0, return_dict=True)
        resultados = {k: float(v) for k, v in eval_out.items()}
    except Exception:
        try:
            raw = model.evaluate(dataset, verbose=0)
            raw = raw if isinstance(raw, (list, tuple)) else [raw]
            names = model.metrics_names or [f"metric_{i}" for i in range(len(raw))]
            resultados = {n: float(v) for n, v in zip(names, raw)}
        except:
            resultados = {}

    # ==========================================================
    # 2) PREDICT
    # ==========================================================
    # Detect model type once
    is_linear = "linear" in df_info["basename"]
    is_lstm   = "lstm"   in df_info["basename"]

    X_parts = []
    preds = []
    trues = []

    for Xb, Yb in dataset:
        preds.append(model.predict(Xb, verbose=0))
        trues.append(Yb)

        if is_linear:
            # Xb is already a tensor (batch, features)
            X_parts.append(Xb.numpy())

        elif is_lstm:
            # LSTM/X uses dict inputs
            X_parts.append(Xb["num_feats"].numpy())

        else:
            raise ValueError(f"Unknown model basename: {df_info['basename']}")

    # === Now safe to concatenate ===
    Y_pred = np.concatenate(preds, axis=0)
    Y_real = np.concatenate(trues, axis=0)
    X_vals = np.concatenate(X_parts, axis=0)

    # ==========================================================
    # 3) LIMIT SAMPLE SIZE
    # ==========================================================
    max_samples = len(Y_real) if not max_samples or max_samples <= 0 else min(max_samples, len(Y_real))
    Y_real, Y_pred, X_vals = Y_real[:max_samples], Y_pred[:max_samples], X_vals[:max_samples]

    # ==========================================================
    # 4) CREATE DF FROM MODEL INPUT
    # ==========================================================
    df_x = pd.DataFrame()
    df_y_real = pd.DataFrame()
    df_y_pred = pd.DataFrame()

    if is_lstm:
        # Un sqe2sqe reshape
        Y_real = Y_real.squeeze(-1)
        Y_pred = Y_pred.squeeze(-1)
        X_vals = X_vals[:, -1, :]
        tmp = []
        for col in df_info["target_cols"]:
            for lead in range(1, df_info["lead"]+1):
                tmp.append(f"{col}_lead{lead}")
        df_info["target_cols"] = tmp
            
    df_x = pd.DataFrame(X_vals, columns=df_info['feature_cols'])
    df_y_real = pd.DataFrame(Y_real, columns=df_info['target_cols'])
    df_y_pred = pd.DataFrame(Y_pred, columns=df_info['target_cols'])


    # ==========================================================
    # 5) DENORMALIZE + DECODE
    # ==========================================================
    if preproc:
        df_x = preproc.denormalize('standard', df=df_x)
        df_y_pred = preproc.denormalize('standard', df=df_y_pred)
        df_y_real = preproc.denormalize('standard', df=df_y_real)

        df_x = preproc.decode('label', df=df_x, target_col="country")
        df_x = preproc.decode('time_cycle', df=df_x)

    # ==========================================================
    # 6) COMBINE EVERYTHING INTO A SINGLE DF
    # ==========================================================
    df_full = df_x.copy()

    # Attach real + predicted columns
    for col in df_y_real.columns:
        df_full[f"{col}_real"] = df_y_real[col].values
    for col in df_y_pred.columns:
        df_full[f"{col}_pred"] = df_y_pred[col].values

    # Detect country columns
    country_cols = [c for c in df_full.columns if "country" in c.lower()]

    # Sorting order
    order_cols = []
    if "datetime" in df_full.columns:
        order_cols.append("datetime")
    order_cols.extend(country_cols)

    if order_cols:
        df_full = df_full.sort_values(order_cols).reset_index(drop=True)
    else:
        print("‚ö†Ô∏è Nenhuma coluna para ordenar. Mantendo ordem original.")

    df_combined = df_full  # keep this for later use

    # ==========================================================
    # 7) OUTPUT DIR
    # ==========================================================
    output_dir = os.path.join(save_dir, problem_name, model.name)
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir, exist_ok=True)

    # ==========================================================
    # 8) GLOBAL METRICS
    # ==========================================================
    yt_all = df_y_real.values.reshape(-1)
    yp_all = df_y_pred.values.reshape(-1)
    diff = yp_all - yt_all

    mae = float(np.mean(np.abs(diff)))
    mse = float(np.mean(diff ** 2))
    rmse = float(np.sqrt(mse))

    corr = float(np.corrcoef(yt_all, yp_all)[0, 1]) \
        if np.std(yt_all) > 0 and np.std(yp_all) > 0 else float("nan")

    ss_res = np.sum((yt_all - yp_all) ** 2)
    ss_tot = np.sum((yt_all - np.mean(yt_all)) ** 2)
    r2 = float(1 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

    resultados.update(dict(mae=mae, mse=mse, rmse=rmse, correlacao_pearson=corr, r2=r2))

    # ==========================================================
    # 9) PER-LEAD METRICS
    # ==========================================================
    per_lead = {}
    lead_cols = df_y_real.columns

    for col in lead_cols:
        t = df_y_real[col].values
        p = df_y_pred[col].values
        d = p - t

        mae_i = float(np.mean(np.abs(d)))
        mse_i = float(np.mean(d ** 2))
        rmse_i = float(np.sqrt(mse_i))

        corr_i = float(np.corrcoef(t, p)[0, 1]) \
            if np.std(t) > 0 and np.std(p) > 0 else float("nan")

        ss_res = np.sum((t - p) ** 2)
        ss_tot = np.sum((t - np.mean(t)) ** 2)
        r2_i = float(1 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

        for key, val in zip(["mae", "mse", "rmse", "correlacao_pearson", "r2"],
                            [mae_i, mse_i, rmse_i, corr_i, r2_i]):
            per_lead.setdefault(key, []).append(val)

    # ==========================================================
    # 10) SAVE OVERALL METRIC TABLE
    # ==========================================================
    num_items = [(k, v) for k, v in resultados.items() if isinstance(v, (int, float))]

    fig, ax = plt.subplots(figsize=(8, 0.4 * len(num_items) + 1))
    ax.axis("off")
    ax.table(
        cellText=[[k, f"{v:.6f}"] for k, v in num_items],
        colLabels=["M√©trica", "Valor"],
        loc="center",
    )
    plt.tight_layout()
    fig.savefig(os.path.join(output_dir, "overall_metrics.png"), dpi=150)
    plt.show() if show_plots else plt.close()

    # ==========================================================
    # 11) PLOT METRIC PER LEAD
    # ==========================================================
    for metric, vals in per_lead.items():
        xs = np.arange(1, len(vals) + 1)
        plt.figure(figsize=(9, 4))
        plt.plot(xs, vals, marker="o")
        plt.title(f"{titulo} ‚Äî {metric.upper()} por Lead")
        plt.xlabel("Lead")
        plt.ylabel(metric.upper())
        plt.grid(True, linestyle="--", alpha=0.6)
        plt.tight_layout()
        fp = os.path.join(output_dir, f"metric_{metric}.png")
        plt.savefig(fp, dpi=150)
        plt.show() if show_plots else plt.close()

    # ==========================================================
    # 12) PLOTS POR PA√çS E LEAD
    # ==========================================================
    datetime_col = "datetime"
    if datetime_col not in df_combined.columns:
        raise ValueError("Coluna 'datetime' n√£o encontrada em df_combined.")

    if len(country_cols) >= 1:
        main_country_col = country_cols[0]
        unique_countries = df_combined[main_country_col].unique()
    else:
        unique_countries = [None]

    for country in unique_countries:
        if country is not None:
            df_plot = df_combined[df_combined[main_country_col] == country]
            tag = f"{main_country_col}_{country}"
        else:
            df_plot = df_combined
            tag = "all"

        time_axis = pd.to_datetime(df_plot[datetime_col])

        for col in lead_cols:
            real_col = f"{col}_real"
            pred_col = f"{col}_pred"

            plt.figure(figsize=(12, 5))
            plt.plot(time_axis, df_plot[real_col], label=f"Real ({col})", linewidth=2)
            plt.plot(time_axis, df_plot[pred_col],
                     label=f"Pred ({col})", linestyle="--", linewidth=2)

            plt.title(f"{titulo} ‚Äî {col} ‚Äî {tag}")
            plt.xlabel("Tempo")
            plt.ylabel("Valor")
            plt.grid(True, linestyle="--", alpha=0.6)
            plt.legend()

            fp = os.path.join(output_dir, f"{tag}_lead_{col}.png")
            plt.tight_layout()
            plt.savefig(fp, dpi=150)
            plt.show() if show_plots else plt.close()

    print("‚úÖ Avalia√ß√£o conclu√≠da.")

    return dict(
        resultados,
        y_true=Y_real,
        y_pred=Y_pred,
        per_lead_metrics=per_lead,
        df=df_combined
    )


# N1A ‚Äî S√©rie Univariada (seq_len=72, lead=72)


Objetivo
- Prever 24 horas de carga √† frente com janelas de 48 horas de hist√≥rico para um √∫nico pa√≠s.


Artefatos esperados
- Parquet (Linear/MLP): `data/N1A/linear_dataset_{split}.parquet` + `linear_dataset_{split}.meta.json` ‚Üí { x_dim, y_dim }
- Parquet (LSTM): `data/N1A/lstm_dataset_{split}.parquet` + `lstm_dataset_{split}.meta.json` ‚Üí { seq_len=240, lead=72, x_dim, y_dim }


Modelos a comparar
- Linear, MLP, LSTM (Keras) e, opcionalmente, TFT.


M√©tricas e checks
- MAE, RMSE, MAPE.
- Checar: shapes conforme meta.json; aus√™ncia de NaNs; n√∫mero de amostras > 0.


Visualiza√ß√µes sugeridas
- Boxplot de erro por horizonte; barras de MAE por modelo; curva MAE vs horizonte.


Notas
- As variantes A/B s√£o obtidas reduzindo a janela/horizonte efetivos na avalia√ß√£o a partir do dataset base (240/72).
- Padding (se houver) deve usar sentinela fixo para permitir mascaramento e ru√≠do seletivo.

In [None]:
# Carregamento dos dataset N1A (Parquet)

# linear/mlp (iguais)
ds_linear, linear_info = get_problem_df(
    model_type='linear',
    lag=72,
    lead=72,
    country_list=['ES'],
    problem_name='N1A'
)

# lstm
ds_lstm, lstm_info = get_problem_df(
    model_type='lstm',
    lag=72,
    lead=72,
    country_list=['ES'],
    problem_name='N1A'
)


## Avaliando dados

### Avaliando modelo Linear
avaliar_modelo_keras(
    model=linear,
    dataset=ds_linear,
    titulo="Modelo Linear - Problema N1A",
    problem_name="N1A",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['linear'],   
    df_info=linear_info
)

### Avaliando modelo MLP
avaliar_modelo_keras(
    model=mlp,
    dataset=ds_linear,
    titulo="Modelo MLP - Problema N1A",
    problem_name="N1A",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['mlp'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo LSTM
avaliar_modelo_keras(
    model=lstm,
    dataset=ds_lstm,
    titulo="Modelo lstm - Problema N1A",
    problem_name="N1A",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['lstm'],
               
            
                
    df_info=lstm_info
)

üöÄ Avaliando modelo 'linear_model'...


2025-11-13 07:48:03.808493: I external/local_xla/xla/service/service.cc:163] XLA service 0x18713540 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-13 07:48:03.808504: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
2025-11-13 07:48:03.816563: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-13 07:48:03.831921: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91501
I0000 00:00:1763030884.270663   17137 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2025-11-13 07:48:04.291774: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints s

‚úÖ Avalia√ß√£o conclu√≠da.
üöÄ Avaliando modelo 'mlp_model'...


2025-11-13 07:48:23.246058: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-13 07:48:23.246092: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.

2025-11-13 07:48:24.521937: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-13 07:48:24.521957: I external/

# N1B ‚Äî S√©rie Univariada (seq_len=168, lead=48)


Objetivo
- Prever 48 horas de carga √† frente com janelas de 168 horas de hist√≥rico para um √∫nico pa√≠s.


Artefatos esperados
- Parquet (Linear/MLP): `data/N1B/linear_dataset_{split}.parquet` + meta { x_dim, y_dim }
- Parquet (LSTM): `data/N1B/lstm_dataset_{split}.parquet` + meta { seq_len=240, lead=72, x_dim, y_dim }
- TFT (opcional): `data/treinamento/tft_dataset_{split}.parquet`


Modelos a comparar
- Linear, MLP, LSTM, TFT (opcional).


M√©tricas e checks
- MAE, RMSE, MAPE; valida√ß√£o de shapes e aus√™ncia de NaNs.


Visualiza√ß√µes sugeridas
- Barras de MAE m√©dio por modelo; curva de erro por horizonte.


Notas
- As variantes A/B s√£o derivadas do dataset base (240/72) reduzindo janela/horizonte na avalia√ß√£o, sem retreinar.

In [None]:
# Carregamento dos dataset N1B (Parquet)

# linear/mlp (iguais)
ds_linear, linear_info = get_problem_df(
    model_type='linear',
    lag=168,
    lead=72,
    country_list=['ES'],
    problem_name='N1B'
)

# lstm
ds_lstm, lstm_info = get_problem_df(
    model_type='lstm',
    lag=168,
    lead=72,
    country_list=['ES'],
    problem_name='N1B'
)


## Avaliando dados

## Avaliando modelo Linear
avaliar_modelo_keras(
    model=linear,
    dataset=ds_linear,
    titulo="Modelo Linear - Problema N1B",
    problem_name="N1B",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['linear'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo MLP
avaliar_modelo_keras(
    model=mlp,
    dataset=ds_linear,
    titulo="Modelo MLP - Problema N1B",
    problem_name="N1B",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['mlp'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo LSTM
avaliar_modelo_keras(
    model=lstm,
    dataset=ds_lstm,
    titulo="Modelo lstm - Problema N1B",
    problem_name="N1B",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['lstm'],
               
            
                
    df_info=lstm_info
)

# N1C ‚Äî S√©rie Univariada (seq_len=240, lead=72)


Objetivo
- Pr√©-treino/treino com a janela de 240 horas e avaliar horizonte de 72 horas.


Artefatos esperados
- Parquet (Linear/MLP): `data/N1C/linear_dataset_{split}.parquet` + meta { x_dim, y_dim }
- Parquet (LSTM): `data/N1C/lstm_dataset_{split}.parquet` + meta { seq_len=240, lead=72, x_dim, y_dim }
- TFT (opcional): `data/treinamento/tft_dataset_{split}.parquet`


Modelos a comparar
- Linear, MLP, LSTM, TFT (opcional).


M√©tricas e checks
- MAE, RMSE, MAPE; n√∫mero de amostras por split; coer√™ncia entre seq_len/lead do meta e shapes efetivos.


Visualiza√ß√µes sugeridas
- Curva comparativa de MAE vs horizonte; top‚Äëk modelos por MAE.


Notas
- Esta variante (C) √© a base m√°xima de lookback e horizonte; A/B s√£o obtidas por redu√ß√£o na avalia√ß√£o.

In [None]:
# Carregamento dos dataset N1C (Parquet)

# linear/mlp (iguais)
ds_linear, linear_info = get_problem_df(
    model_type='linear',
    lag=240,
    lead=72,
    country_list=['ES'],
    problem_name='N1C'
)

# lstm
ds_lstm, lstm_info = get_problem_df(
    model_type='lstm',
    lag=240,
    lead=72,
    country_list=['ES'],
    problem_name='N1C'
)


## Avaliando dados

## Avaliando modelo Linear
avaliar_modelo_keras(
    model=linear,
    dataset=ds_linear,
    titulo="Modelo Linear - Problema N1C",
    problem_name="N1C",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['linear'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo MLP
avaliar_modelo_keras(
    model=mlp,
    dataset=ds_linear,
    titulo="Modelo MLP - Problema N1C",
    problem_name="N1C",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['mlp'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo LSTM
avaliar_modelo_keras(
    model=lstm,
    dataset=ds_lstm,
    titulo="Modelo lstm - Problema N1C",
    problem_name="N1C",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['lstm'],
               
            
                
    df_info=lstm_info
)

# N2A ‚Äî M√∫ltiplos Pa√≠ses (seq_len=72, lead=24)


Objetivo
- Prever 24 horas com 72 horas de hist√≥rico, agrupando por pa√≠s.


Artefatos esperados
- Parquet (Linear/MLP): `data/N2A/linear_dataset_{split}.parquet` + meta
- Parquet (LSTM): `data/N2A/lstm_dataset_{split}.parquet` + meta { seq_len=240, lead=72, x_dim, y_dim }
- TFT (recomendado): `data/treinamento/tft_dataset_{split}.parquet` (colunas: _group_id=country, time_idx crescente por grupo, quantity_MW)


Modelos a comparar
- Linear, MLP, LSTM (podem exigir codifica√ß√£o/flatten por grupo);
- TFT (nativamente multi‚Äëgrupo).


M√©tricas e checks
- MAE/RMSE por pa√≠s e globais; n√∫mero de grupos; equil√≠brio de amostras por grupo.


Visualiza√ß√µes sugeridas
- Barras de MAE por modelo; facetas por pa√≠s; curva MAE vs horizonte.


Notas
- As variantes A/B/C partem do dataset base (240/72), aplicando janelas/horizontes reduzidos na avalia√ß√£o.

In [None]:
# Carregamento dos dataset N2A (Parquet)

# linear/mlp (iguais)
ds_linear, linear_info = get_problem_df(
    model_type='linear',
    lag=72,
    lead=72,
    country_list=['ES'],
    problem_name='N2A'
)

# lstm
ds_lstm, lstm_info = get_problem_df(
    model_type='lstm',
    lag=72,
    lead=72,
    country_list=['ES'],
    problem_name='N2A'
)


## Avaliando dados

## Avaliando modelo Linear
avaliar_modelo_keras(
    model=linear,
    dataset=ds_linear,
    titulo="Modelo Linear - Problema N2A",
    problem_name="N2A",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['linear'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo MLP
avaliar_modelo_keras(
    model=mlp,
    dataset=ds_linear,
    titulo="Modelo MLP - Problema N2A",
    problem_name="N2A",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['mlp'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo LSTM
avaliar_modelo_keras(
    model=lstm,
    dataset=ds_lstm,
    titulo="Modelo lstm - Problema N2A",
    problem_name="N2A",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['lstm'],
               
            
                
    df_info=lstm_info
)

# N2B ‚Äî M√∫ltiplos Pa√≠ses (seq_len=168, lead=48)


Objetivo
- Prever 48 horas com 168 horas de hist√≥rico, agrupado por pa√≠s.


Artefatos esperados
- Parquet (Linear/MLP): `data/N2B/linear_dataset_{split}.parquet` + meta
- Parquet (LSTM): `data/N2B/lstm_dataset_{split}.parquet` + meta { seq_len=240, lead=72, x_dim, y_dim }
- TFT (recomendado): `data/treinamento/tft_dataset_{split}.parquet` com `_group_id`, `time_idx`, target.


Modelos a comparar
- Linear, MLP, LSTM; TFT.


M√©tricas e checks
- MAE/RMSE por pa√≠s e agregadas; distribui√ß√£o de amostras por grupo.


Visualiza√ß√µes sugeridas
- Barras de MAE por modelo; linhas por horizonte; painel por pa√≠s.


Notas
- Variantes A/B/C usam janelas/horizontes efetivos na avalia√ß√£o; base: 240/72.

In [None]:
# Carregamento dos dataset N2B (Parquet)

# linear/mlp (iguais)
ds_linear, linear_info = get_problem_df(
    model_type='linear',
    lag=168,
    lead=72,
    country_list=['ES'],
    problem_name='N2B'
)

# lstm
ds_lstm, lstm_info = get_problem_df(
    model_type='lstm',
    lag=168,
    lead=72,
    country_list=['ES'],
    problem_name='N2B'
)


## Avaliando dados

## Avaliando modelo Linear
avaliar_modelo_keras(
    model=linear,
    dataset=ds_linear,
    titulo="Modelo Linear - Problema N2B",
    problem_name="N2B",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['linear'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo MLP
avaliar_modelo_keras(
    model=mlp,
    dataset=ds_linear,
    titulo="Modelo MLP - Problema N2B",
    problem_name="N2B",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['mlp'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo LSTM
avaliar_modelo_keras(
    model=lstm,
    dataset=ds_lstm,
    titulo="Modelo lstm - Problema N2B",
    problem_name="N2B",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['lstm'],
               
            
                
    df_info=lstm_info
)

# N2C ‚Äî M√∫ltiplos Pa√≠ses (seq_len=240, lead=72)


Objetivo
- Prever 72 horas com 240 horas de hist√≥rico, agrupado por pa√≠s. Esta variante √© a base para reuso em A/B.


Artefatos esperados
- Parquet (Linear/MLP): `data/N2C/linear_dataset_{split}.parquet` + meta
- Parquet (LSTM): `data/N2C/lstm_dataset_{split}.parquet` + meta { seq_len=240, lead=72, x_dim, y_dim }
- TFT (recomendado): `data/treinamento/tft_dataset_{split}.parquet` (agrupado por `_group_id`).


Modelos a comparar
- Linear, MLP, LSTM, TFT.


M√©tricas e checks
- MAE, RMSE, MAPE; compara√ß√£o por pa√≠s; checagem de time_idx e integridade por grupo.


Visualiza√ß√µes sugeridas
- Curva MAE vs horizonte; ranking de modelos por pa√≠s e global.


Notas
- Base usa seq_len=240 e lead=72; varia√ß√µes A/B podem ser avaliadas reduzindo janela no dataset sem retreino.

In [None]:
# Carregamento dos dataset N2C (Parquet)

# linear/mlp (iguais)
ds_linear, linear_info = get_problem_df(
    model_type='linear',
    lag=240,
    lead=72,
    country_list=['ES'],
    problem_name='N2C'
)

# lstm
ds_lstm, lstm_info = get_problem_df(
    model_type='lstm',
    lag=240,
    lead=72,
    country_list=['ES'],
    problem_name='N2C'
)


## Avaliando dados

## Avaliando modelo Linear
avaliar_modelo_keras(
    model=linear,
    dataset=ds_linear,
    titulo="Modelo Linear - Problema N2C",
    problem_name="N2C",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['linear'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo MLP
avaliar_modelo_keras(
    model=mlp,
    dataset=ds_linear,
    titulo="Modelo MLP - Problema N2C",
    problem_name="N2C",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['mlp'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo LSTM
avaliar_modelo_keras(
    model=lstm,
    dataset=ds_lstm,
    titulo="Modelo lstm - Problema N2C",
    problem_name="N2C",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['lstm'],
               
            
                
    df_info=lstm_info
)

# N3 ‚Äî Robustez a Ru√≠do (sobre N2)

Objetivo
- Medir degrada√ß√£o de desempenho sob ru√≠do gaussiano nas FEATURES (teste), mantendo r√≥tulos intactos.

Configura√ß√£o
- Conjuntos: use os datasets do N2 (A/B/C).
- Intensidades: œÉ ‚àà {0.00, 0.01, 0.03, 0.05, 0.10}.
- Aplica√ß√£o:
  - Keras/tf.data: `add_noise_features(ds, sigma, tipo='tfdata', pad_sentinel=-999.0)`.
  - TFT: `add_noise_features(tft_ds ou dataloader, sigma, tipo='tft', batch_size=..., train=False)`.

M√©tricas e checks
- MAE/RMSE por sigma; checar preserva√ß√£o de sentinela (TF) e invari√¢ncia de Y.

Visualiza√ß√µes sugeridas
- Curvas MAE vs œÉ por modelo; heatmap de degrada√ß√£o por horizonte e sigma.

Notas
- Aplique ru√≠do ap√≥s normaliza√ß√£o das features.
- N√£o altere o treino; apenas avalia√ß√£o/benchmark.


## N3A

In [None]:
# Carregamento dos dataset N3A (Parquet)

# linear/mlp (iguais)
ds_linear, linear_info = get_problem_df(
    model_type='linear',
    lag=72,
    lead=72,
    country_list=['ES'],
    problem_name='N3A'
)

# lstm
ds_lstm, lstm_info = get_problem_df(
    model_type='lstm',
    lag=72,
    lead=72,
    country_list=['ES'],
    problem_name='N3A'
)


## Avaliando dados

## Avaliando modelo Linear
avaliar_modelo_keras(
    model=linear,
    dataset=ds_linear,
    titulo="Modelo Linear - Problema N3A",
    problem_name="N3A",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['linear'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo MLP
avaliar_modelo_keras(
    model=mlp,
    dataset=ds_linear,
    titulo="Modelo MLP - Problema N3A",
    problem_name="N3A",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['mlp'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo LSTM
avaliar_modelo_keras(
    model=lstm,
    dataset=ds_lstm,
    titulo="Modelo lstm - Problema N3A",
    problem_name="N3A",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['lstm'],
               
            
                
    df_info=lstm_info
)

## N3B

In [None]:
# Carregamento dos dataset N3B (Parquet)

# linear/mlp (iguais)
ds_linear, linear_info = get_problem_df(
    model_type='linear',
    lag=168,
    lead=72,
    country_list=['ES'],
    problem_name='N3B'
)

# lstm
ds_lstm, lstm_info = get_problem_df(
    model_type='lstm',
    lag=168,
    lead=72,
    country_list=['ES'],
    problem_name='N3B'
)


## Avaliando dados

## Avaliando modelo Linear
avaliar_modelo_keras(
    model=linear,
    dataset=ds_linear,
    titulo="Modelo Linear - Problema N3B",
    problem_name="N3B",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['linear'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo MLP
avaliar_modelo_keras(
    model=mlp,
    dataset=ds_linear,
    titulo="Modelo MLP - Problema N3B",
    problem_name="N3B",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['mlp'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo LSTM
avaliar_modelo_keras(
    model=lstm,
    dataset=ds_lstm,
    titulo="Modelo lstm - Problema N3B",
    problem_name="N3B",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['lstm'],
               
            
                
    df_info=lstm_info
)

## N3C

In [None]:
# Carregamento dos dataset N3C (Parquet)

# linear/mlp (iguais)
ds_linear, linear_info = get_problem_df(
    model_type='linear',
    lag=240,
    lead=72,
    country_list=['ES'],
    problem_name='N3C'
)

# lstm
ds_lstm, lstm_info = get_problem_df(
    model_type='lstm',
    lag=240,
    lead=72,
    country_list=['ES'],
    problem_name='N3C'
)


## Avaliando dados

## Avaliando modelo Linear
avaliar_modelo_keras(
    model=linear,
    dataset=ds_linear,
    titulo="Modelo Linear - Problema N3C",
    problem_name="N3C",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['linear'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo MLP
avaliar_modelo_keras(
    model=mlp,
    dataset=ds_linear,
    titulo="Modelo MLP - Problema N3C",
    problem_name="N3C",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['mlp'],
               
            
                
    df_info=linear_info
)

### Avaliando modelo LSTM
avaliar_modelo_keras(
    model=lstm,
    dataset=ds_lstm,
    titulo="Modelo lstm - Problema N3C",
    problem_name="N3C",
    n_leads=72,
    max_samples=5000,
    show_plots=False,
    preproc=preprocessors['lstm'],
               
            
                
    df_info=lstm_info
)