In [1]:
import optuna
import pandas as pd
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import train_test_split

from project_ml_course.data_process import filter_columns_by_correlation_threshold

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
raw_df = pd.read_csv("../data/dados.csv", index_col="Unnamed: 0")

df = filter_columns_by_correlation_threshold(
    df=raw_df,
    ref_col="class",
    method_type="pearson",
    lower_threshold=0.001,
    higher_threshold=0.999,
)

X = df.drop(columns=["class"])
y = df["class"]

# Separar treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [3]:
def objective(trial):
    C = trial.suggest_loguniform("C", 1e-3, 100)
    kernel = trial.suggest_categorical("kernel", ["rbf", "linear", "poly", "sigmoid"])
    # Parâmetros adicionais para poly
    degree = 3
    if kernel == "poly":
        degree = trial.suggest_int("degree", 2, 5)
    svm = SVC(
        C=C,
        kernel=kernel,
        degree=degree,
        probability=True,
        class_weight="balanced",
        random_state=42,
    )
    pipe = Pipeline(
        [
            ("scaler", StandardScaler(with_std=False)),
            ("pca", PCA(n_components=3, random_state=42)),
            ("svm", svm),
        ]
    )
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(
        pipe, X_train, y_train, cv=cv, scoring=make_scorer(f1_score)
    )
    return scores.mean()


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30, show_progress_bar=True)

print("Melhores hiperparâmetros encontrados:")
print(study.best_params)
print(f"Melhor F1 médio (CV): {study.best_value:.4f}")


[I 2025-07-15 21:48:00,712] A new study created in memory with name: no-name-f55bfbf8-4d55-4920-ac0e-74df5da97103
  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:01,142] Trial 0 finished with value: 0.5748794307449971 and parameters: {'C': 0.01707661096569344, 'kernel': 'poly', 'degree': 3}. Best is trial 0 with value: 0.5748794307449971.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:01,532] Trial 1 finished with value: 0.4377400423258053 and parameters: {'C': 24.582642407691132, 'kernel': 'sigmoid'}. Best is trial 0 with value: 0.5748794307449971.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:28,281] Trial 2 finished with value: 0.4682718665386935 and parameters: {'C': 46.84918506717303, 'kernel': 'linear'}. Best is trial 0 with value: 0.5748794307449971.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:28,929] Trial 3 finished with value: 0.5517241379310345 and parameters: {'C': 0.010081210041180146, 'kernel': 'sigmoid'}. Best is trial 0 with value: 0.5748794307449971.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:29,302] Trial 4 finished with value: 0.9472222407681269 and parameters: {'C': 0.6196201569971492, 'kernel': 'rbf'}. Best is trial 4 with value: 0.9472222407681269.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:55,744] Trial 5 finished with value: 0.4682718665386935 and parameters: {'C': 42.790300327988525, 'kernel': 'linear'}. Best is trial 4 with value: 0.9472222407681269.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:56,238] Trial 6 finished with value: 0.5719513286334719 and parameters: {'C': 0.05594519926202318, 'kernel': 'poly', 'degree': 5}. Best is trial 4 with value: 0.9472222407681269.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:56,685] Trial 7 finished with value: 0.4474469819834225 and parameters: {'C': 0.9251687983228754, 'kernel': 'sigmoid'}. Best is trial 4 with value: 0.9472222407681269.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:57,320] Trial 8 finished with value: 0.5238116733840872 and parameters: {'C': 0.12100387985951283, 'kernel': 'sigmoid'}. Best is trial 4 with value: 0.9472222407681269.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:57,739] Trial 9 finished with value: 0.711847057934293 and parameters: {'C': 0.0022106771673140336, 'kernel': 'poly', 'degree': 4}. Best is trial 4 with value: 0.9472222407681269.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:58,016] Trial 10 finished with value: 0.9651656175821899 and parameters: {'C': 1.610032058333692, 'kernel': 'rbf'}. Best is trial 10 with value: 0.9651656175821899.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:58,330] Trial 11 finished with value: 0.9579126837312859 and parameters: {'C': 1.1030907407164943, 'kernel': 'rbf'}. Best is trial 10 with value: 0.9651656175821899.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:58,551] Trial 12 finished with value: 0.9732890010273287 and parameters: {'C': 3.9185388759211035, 'kernel': 'rbf'}. Best is trial 12 with value: 0.9732890010273287.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:58,766] Trial 13 finished with value: 0.9732890010273287 and parameters: {'C': 4.442975063072061, 'kernel': 'rbf'}. Best is trial 12 with value: 0.9732890010273287.
[I 2025-07-15 21:48:58,964] Trial 14 finished with value: 0.9754767132586007 and parameters: {'C': 6.516999487755443, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9754767132586007.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:59,152] Trial 15 finished with value: 0.9769570109575352 and parameters: {'C': 7.70469630752824, 'kernel': 'rbf'}. Best is trial 15 with value: 0.9769570109575352.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:59,365] Trial 16 finished with value: 0.9784599917708607 and parameters: {'C': 10.251516094582083, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.
[I 2025-07-15 21:48:59,538] Trial 17 finished with value: 0.9784599917708607 and parameters: {'C': 18.157108597726467, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:48:59,694] Trial 18 finished with value: 0.9752880746622781 and parameters: {'C': 84.44876262570618, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:49:02,821] Trial 19 finished with value: 0.4674604086384483 and parameters: {'C': 15.358877472978646, 'kernel': 'linear'}. Best is trial 16 with value: 0.9784599917708607.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:49:03,283] Trial 20 finished with value: 0.7804361941455698 and parameters: {'C': 0.22882833881881712, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.
[I 2025-07-15 21:49:03,461] Trial 21 finished with value: 0.9776586314710534 and parameters: {'C': 12.106327315451981, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:49:03,630] Trial 22 finished with value: 0.976149190783017 and parameters: {'C': 15.78244229330851, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:49:03,870] Trial 23 finished with value: 0.9717686005964186 and parameters: {'C': 2.8213630791345228, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.
[I 2025-07-15 21:49:04,044] Trial 24 finished with value: 0.9776586314710534 and parameters: {'C': 12.543698493288383, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:49:04,199] Trial 25 finished with value: 0.9783595171608905 and parameters: {'C': 45.0792661189634, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.
[I 2025-07-15 21:49:04,353] Trial 26 finished with value: 0.9760719066648518 and parameters: {'C': 85.40575273871015, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:49:04,944] Trial 27 finished with value: 0.798942512038718 and parameters: {'C': 26.404956329062642, 'kernel': 'poly', 'degree': 2}. Best is trial 16 with value: 0.9784599917708607.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-15 21:49:05,193] Trial 28 finished with value: 0.9695565633254255 and parameters: {'C': 2.4009748448419623, 'kernel': 'rbf'}. Best is trial 16 with value: 0.9784599917708607.


Best trial: 16. Best value: 0.97846: 100%|██████████| 30/30 [01:04<00:00,  2.17s/it]

[I 2025-07-15 21:49:05,669] Trial 29 finished with value: 0.468012329996804 and parameters: {'C': 0.44706323454078883, 'kernel': 'linear'}. Best is trial 16 with value: 0.9784599917708607.
Melhores hiperparâmetros encontrados:
{'C': 10.251516094582083, 'kernel': 'rbf'}
Melhor F1 médio (CV): 0.9785



