In [None]:
# --------------------------------------------------
# 0. Pacotes
# --------------------------------------------------
from sklearn.datasets import load_digits
from sklearn.model_selection import (train_test_split,
                                     StratifiedKFold,
                                     cross_validate)
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import (accuracy_score,
                             f1_score,
                             make_scorer)
from sklearn.neural_network import MLPClassifier
import numpy as np
import pandas as pd

# --------------------------------------------------
# 1. Wrapper: MLP com escolha de inicialização
# --------------------------------------------------
class InitMLP(MLPClassifier):
    """
    MLPClassifier com escolha da estratégia de inicialização:
    'glorot' (padrão), 'normal' ou 'he_uniform'.
    """
    def __init__(self, *,              # força kwargs-only
                 weight_init="glorot",  # novo parâmetro
                 **kwargs):             # passa o resto para o MLP original
        super().__init__(**kwargs)
        self.weight_init = weight_init

    # --- substitui os pesos depois da _initialize do pai ---
    def _initialize(self, y, layer_units, dtype):
        super()._initialize(y, layer_units, dtype)
        rng = self._random_state
        for i, (fan_in, fan_out) in enumerate(zip(layer_units[:-1],
                                                  layer_units[1:])):
            shape = (fan_in, fan_out)
            if self.weight_init == "normal":
                scale = 1. / np.sqrt(fan_in)
                self.coefs_[i] = rng.normal(0.0, scale, size=shape)
            elif self.weight_init == "he_uniform":
                limit = np.sqrt(6. / fan_in)
                self.coefs_[i] = rng.uniform(-limit, limit, size=shape)
            # ‘glorot’ já foi gerado pelo método do pai

# --------------------------------------------------
# 2. Dados
# --------------------------------------------------
X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, stratify=y, random_state=42)

# --------------------------------------------------
# 3. Configurações (arquitetura, L2, inicialização)
# --------------------------------------------------
configs = {
    #  nome        layers                   alpha       init
    "glo64_l2-4": dict(layers=(64,),       alpha=1e-4, weight_init="glorot"),
    "glo64_l2-3": dict(layers=(64,),       alpha=1e-3, weight_init="glorot"),
    "norm64_l2-4":dict(layers=(64,),       alpha=1e-4, weight_init="normal"),
    "heDeep_l2-4":dict(layers=(128, 64),   alpha=1e-4, weight_init="he_uniform"),
    "heDeep_l2-3":dict(layers=(128, 64),   alpha=1e-3, weight_init="he_uniform"),
    ## ... adicione mais 5 ou mais combinacoes ...
    # "reluWide_l2-2": dict(layers=(n,n),
    #                   alpha=1e-2,
    #                   weight_init="he_uniform",
    #                   activation="relu")
}

# --------------------------------------------------
# 4. Validação cruzada no treino
# --------------------------------------------------
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = {"acc": "accuracy",
           "f1":  make_scorer(f1_score, average="macro")}

rows = []
for name, p in configs.items():
    clf = InitMLP(
        hidden_layer_sizes=p["layers"],
        alpha=p["alpha"],
        weight_init=p["weight_init"],
        max_iter=200,
        early_stopping=True,
        n_iter_no_change=5,
        learning_rate_init=1e-3,
        solver="adam",
        random_state=42,
    )


    pipe = Pipeline([("scale", StandardScaler()),
                     ("clf",   clf)])

    res = cross_validate(pipe, X_train, y_train,
                         cv=cv, scoring=scoring,
                         return_train_score=False)

    rows.append({
        "config":   name,
        "layers":   p["layers"],
        "alpha":    p["alpha"],
        "init":     p["weight_init"],
        "f1_mean":  res["test_f1"].mean(),
        "f1_std":   res["test_f1"].std(),
        "acc_mean": res["test_acc"].mean(),
        "acc_std":  res["test_acc"].std(),
    })

    print(f"{name:12s} | CV macro-F1 = "
          f"{res['test_f1'].mean():.3f} ± {res['test_f1'].std():.3f}")

summary = (pd.DataFrame(rows)
              .sort_values("f1_mean", ascending=False))

best_conf  = summary.iloc[0]
best_name  = best_conf["config"]
best_param = configs[best_name]
print("\n>> Selecionado:", best_name, dict(best_param))

# --------------------------------------------------
# 5. Re-treino em todo o treino + teste final
# --------------------------------------------------
best_clf = InitMLP(
    hidden_layer_sizes=best_param["layers"],
    alpha=best_param["alpha"],
    weight_init=best_param["weight_init"],
    max_iter=200,
    early_stopping=True,
    n_iter_no_change=5,
    learning_rate_init=1e-3,
    solver="adam",
    random_state=42,
)

best_pipe = Pipeline([("scale", StandardScaler()),
                      ("clf",   best_clf)])
best_pipe.fit(X_train, y_train)

y_pred   = best_pipe.predict(X_test)
test_acc = accuracy_score(y_test, y_pred)
test_f1  = f1_score(y_test, y_pred, average="macro")

print(f"\n>> TESTE | acc = {test_acc:.3f} | macro-F1 = {test_f1:.3f}")

# --------------------------------------------------
# 6. Tabela resumo (para o relatório)
# --------------------------------------------------
print("\nResumo completo:")
display(summary[["config", "layers", "alpha", "init",
                 "acc_mean", "acc_std", "f1_mean", "f1_std"]])
