In [None]:
import optuna
import pandas as pd
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
raw_df = pd.read_csv("../data/dados.csv", index_col="Unnamed: 0")

features = ["feat_8", "feat_17", "feat_50"]

X = raw_df[features]
y = raw_df["class"]

# Separar treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [5]:
def objective(trial):
    C = trial.suggest_loguniform("C", 1e-3, 100)
    kernel = trial.suggest_categorical("kernel", ["rbf", "linear", "poly", "sigmoid"])
    # Parâmetros adicionais para poly
    degree = 3
    if kernel == "poly":
        degree = trial.suggest_int("degree", 2, 5)
    svm = SVC(
        C=C,
        kernel=kernel,
        degree=degree,
        probability=True,
        class_weight="balanced",
        random_state=42,
    )
    pipe = Pipeline(
        [
            ("svm", svm),
        ]
    )
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(
        pipe, X_train, y_train, cv=cv, scoring=make_scorer(f1_score)
    )
    return scores.mean()


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30, show_progress_bar=True)

print("Melhores hiperparâmetros encontrados:")
print(study.best_params)
print(f"Melhor F1 médio (CV): {study.best_value:.4f}")


[I 2025-07-16 18:07:51,992] A new study created in memory with name: no-name-d7982795-e61a-4a72-b4d0-32edb5f6d846
  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:52,647] Trial 0 finished with value: 0.4572443634772281 and parameters: {'C': 0.0024633138036785425, 'kernel': 'sigmoid'}. Best is trial 0 with value: 0.4572443634772281.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:53,114] Trial 1 finished with value: 0.5898655005116802 and parameters: {'C': 0.6718709154692556, 'kernel': 'poly', 'degree': 3}. Best is trial 1 with value: 0.5898655005116802.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:53,755] Trial 2 finished with value: 0.0 and parameters: {'C': 0.006700940578991396, 'kernel': 'sigmoid'}. Best is trial 1 with value: 0.5898655005116802.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:54,382] Trial 3 finished with value: 0.5517241379310345 and parameters: {'C': 0.004672322781473244, 'kernel': 'rbf'}. Best is trial 1 with value: 0.5898655005116802.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:54,687] Trial 4 finished with value: 0.7974575729048841 and parameters: {'C': 0.5111749163113858, 'kernel': 'poly', 'degree': 4}. Best is trial 4 with value: 0.7974575729048841.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:55,322] Trial 5 finished with value: 0.5517241379310345 and parameters: {'C': 0.0032919877243925678, 'kernel': 'sigmoid'}. Best is trial 4 with value: 0.7974575729048841.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:55,721] Trial 6 finished with value: 0.43740983089953345 and parameters: {'C': 6.743343797714176, 'kernel': 'sigmoid'}. Best is trial 4 with value: 0.7974575729048841.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:56,112] Trial 7 finished with value: 0.941981166293434 and parameters: {'C': 0.5298534831187143, 'kernel': 'rbf'}. Best is trial 7 with value: 0.941981166293434.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:56,773] Trial 8 finished with value: 0.0 and parameters: {'C': 0.003152630692077122, 'kernel': 'sigmoid'}. Best is trial 7 with value: 0.941981166293434.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:57,210] Trial 9 finished with value: 0.44083495590453126 and parameters: {'C': 1.1392214065464843, 'kernel': 'sigmoid'}. Best is trial 7 with value: 0.941981166293434.
[I 2025-07-16 18:07:57,356] Trial 10 finished with value: 0.9783763642381501 and parameters: {'C': 82.06690896391167, 'kernel': 'rbf'}. Best is trial 10 with value: 0.9783763642381501.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:07:57,502] Trial 11 finished with value: 0.9776163934210296 and parameters: {'C': 62.18137437541473, 'kernel': 'rbf'}. Best is trial 10 with value: 0.9783763642381501.
[I 2025-07-16 18:07:57,645] Trial 12 finished with value: 0.9783763642381501 and parameters: {'C': 93.76470378460532, 'kernel': 'rbf'}. Best is trial 10 with value: 0.9783763642381501.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:27,543] Trial 13 finished with value: 0.46421087619681867 and parameters: {'C': 68.48571391763952, 'kernel': 'linear'}. Best is trial 10 with value: 0.9783763642381501.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:27,761] Trial 14 finished with value: 0.9815725326540512 and parameters: {'C': 9.754981741396318, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.
[I 2025-07-16 18:08:27,941] Trial 15 finished with value: 0.9785427675791947 and parameters: {'C': 8.082325785471395, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:29,787] Trial 16 finished with value: 0.46421087619681867 and parameters: {'C': 6.808659580057067, 'kernel': 'linear'}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:30,305] Trial 17 finished with value: 0.7925769558298678 and parameters: {'C': 0.08862604580335574, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.
[I 2025-07-16 18:08:30,487] Trial 18 finished with value: 0.9808237976068165 and parameters: {'C': 8.686991441735804, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:31,049] Trial 19 finished with value: 0.7979498521964633 and parameters: {'C': 0.04664779623434175, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:37,989] Trial 20 finished with value: 0.5826230274743924 and parameters: {'C': 17.73935887426637, 'kernel': 'poly', 'degree': 5}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:38,218] Trial 21 finished with value: 0.9733353395868463 and parameters: {'C': 3.063643340875016, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.
[I 2025-07-16 18:08:38,384] Trial 22 finished with value: 0.9792212394251841 and parameters: {'C': 15.513909777698526, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:38,546] Trial 23 finished with value: 0.9792849387992284 and parameters: {'C': 18.000527217283764, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:45,138] Trial 24 finished with value: 0.46421087619681867 and parameters: {'C': 24.331715499175722, 'kernel': 'linear'}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:45,394] Trial 25 finished with value: 0.9703799981838191 and parameters: {'C': 2.08973609195578, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:45,892] Trial 26 finished with value: 0.7863498856086749 and parameters: {'C': 0.15814594453161732, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.


  C = trial.suggest_loguniform("C", 1e-3, 100)
  C = trial.suggest_loguniform("C", 1e-3, 100)


[I 2025-07-16 18:08:46,133] Trial 27 finished with value: 0.9703799981838191 and parameters: {'C': 2.561132889709658, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.
[I 2025-07-16 18:08:46,286] Trial 28 finished with value: 0.9799038271414474 and parameters: {'C': 28.65861185396658, 'kernel': 'rbf'}. Best is trial 14 with value: 0.9815725326540512.


Best trial: 14. Best value: 0.981573: 100%|██████████| 30/30 [00:55<00:00,  1.83s/it]

[I 2025-07-16 18:08:47,007] Trial 29 finished with value: 0.7989436410163391 and parameters: {'C': 32.49125009211688, 'kernel': 'poly', 'degree': 2}. Best is trial 14 with value: 0.9815725326540512.
Melhores hiperparâmetros encontrados:
{'C': 9.754981741396318, 'kernel': 'rbf'}
Melhor F1 médio (CV): 0.9816



