# SHAP

In [1]:
from pathlib import Path

import numpy as np
import pandas as pd
import shap

from src.config import Config
from src.evaluation import SHAPExplainer
from src.train import ModelTrainer

In [2]:
cfg = Config(Path("../config/config.yaml"))

In [3]:
model_path = Path(cfg.model.path_dir) / cfg.model.enet_mo_best_30
model, preprocessor, y_scaler, _ = ModelTrainer.load(str(model_path))

X_test = pd.read_parquet(Path(cfg.data.processed_dir) / "X_test_30.parquet")
X_test_proc = preprocessor.transform(X_test)

feature_names = preprocessor.get_feature_names_out()

explainer = SHAPExplainer(
    model=model,
    preprocessor=preprocessor,
    background_data=X_test.sample(n=100, random_state=cfg.runtime.seed),
    mode="linear",
)

shap_values = explainer.explain(X_test)

shap.summary_plot(
    shap_values[0],
    X_test_proc,
    feature_names=feature_names,
    show=True
)

ValueError: columns are missing: {'ema_5', 'sma_10', 'q_std', 'sma_25', 'sma_5', 'l_bollinger', 'lag_10', 'q_skew', 'h_bollinger', 'lag_25', 'macd', 'q_mean', 'rsi', 'ema_10', 'ema_25'}

In [None]:
X_proc = preprocessor.transform(X_test)
y_pred_scaled = model.predict(X_proc)
print("Y scaled prediction std dev per target:\n", np.std(y_pred_scaled, axis=0))

In [None]:
print("Feature std dev after preprocessing:\n", X_proc.std(axis=0))

In [None]:
shap_array = shap_values[0] if isinstance(shap_values, list) else shap_values
print("SHAP mean abs:\n", np.mean(np.abs(shap_array), axis=0))