<a href="https://colab.research.google.com/github/vitormunnizz/audio_for/blob/main/ml_aumento.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import lightgbm as lgb
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

In [None]:
# Montar o Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Caminho para os dados no Google Drive
file_path_real = '/content/drive/MyDrive/audios_FOR/real_augmented_all'
file_path_fake = '/content/drive/MyDrive/audios_FOR/fake_augmented_all'

In [None]:
# Função para extrair MFCCs de um arquivo
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        return np.mean(mfccs.T, axis=0)  # Média dos coeficientes
    except Exception as e:
        print(f"Erro ao processar {file_path}: {e}")
        return None

# Carregar dados e rótulos
data = []
labels = []

In [None]:
def load_data(folder_path, label):
    global data, labels
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        features = extract_features(file_path)
        if features is not None:
            data.append(features)
            labels.append(label)

# Criar as pastas 'real' e 'fake' caso não existam
if not os.path.exists(file_path_real):
    os.makedirs(file_path_real)
if not os.path.exists(file_path_fake):
    os.makedirs(file_path_fake)

# Processar dados
load_data(file_path_fake, 0)  # Rótulo 0 para Fake
load_data(file_path_real, 1)  # Rótulo 1 para Real

In [None]:
# Remover dados faltantes
data_filtered = []
labels_filtered = []
for i in range(len(data)):
    if data[i] is not None:
        data_filtered.append(data[i])
        labels_filtered.append(labels[i])

data = data_filtered
labels = labels_filtered

In [None]:
# Converter para DataFrame
df = pd.DataFrame(data)
df['label'] = labels

# Contar as ocorrências de cada label
label_counts = df['label'].value_counts()

In [None]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,label
0,-310.484985,79.58918,12.035122,21.775892,7.463613,-4.327323,1.702638,5.056441,-11.605008,-8.825641,-12.136454,-8.166679,-7.88645,0
1,-326.426239,79.519638,14.534233,20.880121,7.56072,-4.820358,-0.540747,4.899628,-12.638927,-10.141902,-13.165885,-9.67397,-8.872346,0
2,-310.484985,79.58918,12.035122,21.775892,7.463613,-4.327323,1.702638,5.056441,-11.605008,-8.825641,-12.136454,-8.166679,-7.88645,0
3,-292.760193,85.521347,-18.103905,14.233822,-1.934288,-8.300287,-15.360986,-14.333099,-0.524541,-12.45305,-2.744914,-14.212177,0.993812,0
4,-292.760193,85.521347,-18.103905,14.233822,-1.934288,-8.300287,-15.360986,-14.333099,-0.524541,-12.45305,-2.744914,-14.212177,0.993812,0


In [None]:
df.shape

(2445, 14)

In [None]:
# Separar características (X) e rótulos (y)
X = df.drop(columns=['label'])
y = df['label']

In [None]:
# UNDERSAMPLING
under = RandomUnderSampler(random_state=42)
X_under, y_under = under.fit_resample(X, y)
print("Distribuição após undersampling:", Counter(y_under))

Distribuição após undersampling: Counter({0: 695, 1: 695})


In [None]:
# OVERSAMPLING (SMOTE)
smote = SMOTE(random_state=42)
X_over, y_over = smote.fit_resample(X, y)
print("Distribuição após oversampling:", Counter(y_over))

Distribuição após oversampling: Counter({0: 1750, 1: 1750})


In [None]:
# Função para calcular e exibir as métricas
def print_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    print(f"Acurácia: {accuracy:.2f}")
    print(f"Precisão: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-Score: {f1:.2f}")
    print("\n")

### Random Forest

In [None]:
# Converter para arrays, se necessário
X_array = X.values if hasattr(X, "values") else np.array(X)
y_array = y.values if hasattr(y, "values") else np.array(y)

# Inicializar K-Fold
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs, precs, recalls, f1s = [], [], [], []

# Loop pelos folds
for fold, (train_idx, test_idx) in enumerate(skf.split(X_array, y_array)):
    X_train, X_test = X_array[train_idx], X_array[test_idx]
    y_train, y_test = y_array[train_idx], y_array[test_idx]

    rf_model = RandomForestClassifier(n_estimators=1, max_features=1)
    rf_model.fit(X_train, y_train)
    y_pred = rf_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    accs.append(acc)
    precs.append(prec)
    recalls.append(rec)
    f1s.append(f1)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs):.2f} ± {np.std(accs):.2f}")
print(f"Precisão média: {np.mean(precs):.2f} ± {np.std(precs):.2f}")
print(f"Recall médio: {np.mean(recalls):.2f} ± {np.std(recalls):.2f}")
print(f"F1-Score médio: {np.mean(f1s):.2f} ± {np.std(f1s):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.92 ± 0.02
Precisão média: 0.95 ± 0.01
Recall médio: 0.95 ± 0.02
F1-Score médio: 0.95 ± 0.01


In [None]:
# Converter para arrays, se necessário
X_array_under = X_under.values if hasattr(X_under, "values") else np.array(X_under)
y_array_under = y_under.values if hasattr(y_under, "values") else np.array(y_under)

# Inicializar K-Fold
skf_under = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs_under, precs_under, recalls_under, f1s_under = [], [], [], []

# Loop pelos folds
for fold_under, (train_idx_under, test_idx_under) in enumerate(skf_under.split(X_array_under, y_array_under)):
    X_train_under, X_test_under = X_array_under[train_idx_under], X_array_under[test_idx_under]
    y_train_under, y_test_under = y_array_under[train_idx_under], y_array_under[test_idx_under]

    rf_model_under = RandomForestClassifier(n_estimators=1, max_features=1)
    rf_model_under.fit(X_train_under, y_train_under)
    y_pred_under = rf_model_under.predict(X_test_under)

    acc_under = accuracy_score(y_test_under, y_pred_under)
    prec_under = precision_score(y_test_under, y_pred_under)
    rec_under = recall_score(y_test_under, y_pred_under)
    f1_under = f1_score(y_test_under, y_pred_under)

    accs_under.append(acc_under)
    precs_under.append(prec_under)
    recalls_under.append(rec_under)
    f1s_under.append(f1_under)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_under):.2f} ± {np.std(accs_under):.2f}")
print(f"Precisão média: {np.mean(precs_under):.2f} ± {np.std(precs_under):.2f}")
print(f"Recall médio: {np.mean(recalls_under):.2f} ± {np.std(recalls_under):.2f}")
print(f"F1-Score médio: {np.mean(f1s_under):.2f} ± {np.std(f1s_under):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.89 ± 0.03
Precisão média: 0.91 ± 0.04
Recall médio: 0.86 ± 0.05
F1-Score médio: 0.88 ± 0.03


In [None]:
# Converter para arrays, se necessário
X_array_over = X_over.values if hasattr(X_over, "values") else np.array(X_over)
y_array_over = y_over.values if hasattr(y_over, "values") else np.array(y_over)

# Inicializar K-Fold
skf_over = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs_over, precs_over, recalls_over, f1s_over = [], [], [], []

# Loop pelos folds
for fold_over, (train_idx_over, test_idx_over) in enumerate(skf_over.split(X_array_over, y_array_over)):
    X_train_over, X_test_over = X_array_over[train_idx_over], X_array_over[test_idx_over]
    y_train_over, y_test_over = y_array_over[train_idx_over], y_array_over[test_idx_over]

    rf_model_over = RandomForestClassifier(n_estimators=1, max_features=1)
    rf_model_over.fit(X_train_over, y_train_over)
    y_pred_over = rf_model_over.predict(X_test_over)

    acc_over = accuracy_score(y_test_over, y_pred_over)
    prec_over = precision_score(y_test_over, y_pred_over)
    rec_over = recall_score(y_test_over, y_pred_over)
    f1_over = f1_score(y_test_over, y_pred_over)

    accs_over.append(acc_over)
    precs_over.append(prec_over)
    recalls_over.append(rec_over)
    f1s_over.append(f1_over)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_over):.2f} ± {np.std(accs_over):.2f}")
print(f"Precisão média: {np.mean(precs_over):.2f} ± {np.std(precs_over):.2f}")
print(f"Recall médio: {np.mean(recalls_over):.2f} ± {np.std(recalls_over):.2f}")
print(f"F1-Score médio: {np.mean(f1s_over):.2f} ± {np.std(f1s_over):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.95 ± 0.01
Precisão média: 0.95 ± 0.02
Recall médio: 0.94 ± 0.03
F1-Score médio: 0.95 ± 0.01


### LightGBM

In [None]:
# Converter para arrays, se necessário
X_array = X.values if hasattr(X, "values") else np.array(X)
y_array = y.values if hasattr(y, "values") else np.array(y)

# Inicializar K-Fold
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs, precs, recalls, f1s = [], [], [], []

# Loop pelos folds
for fold, (train_idx, test_idx) in enumerate(skf.split(X_array, y_array)):
    X_train, X_test = X_array[train_idx], X_array[test_idx]
    y_train, y_test = y_array[train_idx], y_array[test_idx]

    lgb_model = lgb.LGBMClassifier(verbose=-1, n_estimators=1)
    lgb_model.fit(X_train, y_train)
    y_pred = lgb_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    accs.append(acc)
    precs.append(prec)
    recalls.append(rec)
    f1s.append(f1)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs):.2f} ± {np.std(accs):.2f}")
print(f"Precisão média: {np.mean(precs):.2f} ± {np.std(precs):.2f}")
print(f"Recall médio: {np.mean(recalls):.2f} ± {np.std(recalls):.2f}")
print(f"F1-Score médio: {np.mean(f1s):.2f} ± {np.std(f1s):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.72 ± 0.00
Precisão média: 0.72 ± 0.00
Recall médio: 1.00 ± 0.00
F1-Score médio: 0.83 ± 0.00




In [None]:
# Converter para arrays, se necessário
X_array_under = X_under.values if hasattr(X_under, "values") else np.array(X_under)
y_array_under = y_under.values if hasattr(y_under, "values") else np.array(y_under)

# Inicializar K-Fold
skf_under = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs_under, precs_under, recalls_under, f1s_under = [], [], [], []

# Loop pelos folds
for fold_under, (train_idx_under, test_idx_under) in enumerate(skf_under.split(X_array_under, y_array_under)):
    X_train_under, X_test_under = X_array_under[train_idx_under], X_array_under[test_idx_under]
    y_train_under, y_test_under = y_array_under[train_idx_under], y_array_under[test_idx_under]

    lgb_model_under = lgb.LGBMClassifier(verbose=-1, n_estimators=1)
    lgb_model_under.fit(X_train_under, y_train_under)
    y_pred_under = lgb_model_under.predict(X_test_under)

    acc_under = accuracy_score(y_test_under, y_pred_under)
    prec_under = precision_score(y_test_under, y_pred_under)
    rec_under = recall_score(y_test_under, y_pred_under)
    f1_under = f1_score(y_test_under, y_pred_under)

    accs_under.append(acc_under)
    precs_under.append(prec_under)
    recalls_under.append(rec_under)
    f1s_under.append(f1_under)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_under):.2f} ± {np.std(accs_under):.2f}")
print(f"Precisão média: {np.mean(precs_under):.2f} ± {np.std(precs_under):.2f}")
print(f"Recall médio: {np.mean(recalls_under):.2f} ± {np.std(recalls_under):.2f}")
print(f"F1-Score médio: {np.mean(f1s_under):.2f} ± {np.std(f1s_under):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.84 ± 0.02
Precisão média: 0.84 ± 0.04
Recall médio: 0.86 ± 0.04
F1-Score médio: 0.85 ± 0.02




In [None]:
# Converter para arrays, se necessário
X_array_over = X_over.values if hasattr(X_over, "values") else np.array(X_over)
y_array_over = y_over.values if hasattr(y_over, "values") else np.array(y_over)

# Inicializar K-Fold
skf_over = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs_over, precs_over, recalls_over, f1s_over = [], [], [], []

# Loop pelos folds
for fold_over, (train_idx_over, test_idx_over) in enumerate(skf_over.split(X_array_over, y_array_over)):
    X_train_over, X_test_over = X_array_over[train_idx_over], X_array_over[test_idx_over]
    y_train_over, y_test_over = y_array_over[train_idx_over], y_array_over[test_idx_over]

    lgb_model_over = lgb.LGBMClassifier(verbose=-1, n_estimators=1)
    lgb_model_over.fit(X_train_over, y_train_over)
    y_pred_over = lgb_model_over.predict(X_test_over)

    acc_over = accuracy_score(y_test_over, y_pred_over)
    prec_over = precision_score(y_test_over, y_pred_over)
    rec_over = recall_score(y_test_over, y_pred_over)
    f1_over = f1_score(y_test_over, y_pred_over)

    accs_over.append(acc_over)
    precs_over.append(prec_over)
    recalls_over.append(rec_over)
    f1s_over.append(f1_over)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_over):.2f} ± {np.std(accs_over):.2f}")
print(f"Precisão média: {np.mean(precs_over):.2f} ± {np.std(precs_over):.2f}")
print(f"Recall médio: {np.mean(recalls_over):.2f} ± {np.std(recalls_over):.2f}")
print(f"F1-Score médio: {np.mean(f1s_over):.2f} ± {np.std(f1s_over):.2f}")



MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.91 ± 0.01
Precisão média: 0.93 ± 0.02
Recall médio: 0.89 ± 0.01
F1-Score médio: 0.91 ± 0.01




### Naïve Bayes

In [None]:
# Converter para arrays, se necessário
X_array = X.values if hasattr(X, "values") else np.array(X)
y_array = y.values if hasattr(y, "values") else np.array(y)

# Inicializar K-Fold
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs, precs, recalls, f1s = [], [], [], []

# Loop pelos folds
for fold, (train_idx, test_idx) in enumerate(skf.split(X_array, y_array)):
    X_train, X_test = X_array[train_idx], X_array[test_idx]
    y_train, y_test = y_array[train_idx], y_array[test_idx]

    nb_model = GaussianNB()
    nb_model.fit(X_train, y_train)
    y_pred = nb_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    accs.append(acc)
    precs.append(prec)
    recalls.append(rec)
    f1s.append(f1)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs):.2f} ± {np.std(accs):.2f}")
print(f"Precisão média: {np.mean(precs):.2f} ± {np.std(precs):.2f}")
print(f"Recall médio: {np.mean(recalls):.2f} ± {np.std(recalls):.2f}")
print(f"F1-Score médio: {np.mean(f1s):.2f} ± {np.std(f1s):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.85 ± 0.03
Precisão média: 0.89 ± 0.03
Recall médio: 0.90 ± 0.03
F1-Score médio: 0.89 ± 0.02


In [None]:
# Converter para arrays, se necessário
X_array_under = X_under.values if hasattr(X_under, "values") else np.array(X_under)
y_array_under = y_under.values if hasattr(y_under, "values") else np.array(y_under)

# Inicializar K-Fold
skf_under = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs_under, precs_under, recalls_under, f1s_under = [], [], [], []

# Loop pelos folds
for fold_under, (train_idx_under, test_idx_under) in enumerate(skf_under.split(X_array_under, y_array_under)):
    X_train_under, X_test_under = X_array_under[train_idx_under], X_array_under[test_idx_under]
    y_train_under, y_test_under = y_array_under[train_idx_under], y_array_under[test_idx_under]

    nb_model_under = GaussianNB()
    nb_model_under.fit(X_train_under, y_train_under)
    y_pred_under = nb_model_under.predict(X_test_under)

    acc_under = accuracy_score(y_test_under, y_pred_under)
    prec_under = precision_score(y_test_under, y_pred_under)
    rec_under = recall_score(y_test_under, y_pred_under)
    f1_under = f1_score(y_test_under, y_pred_under)

    accs_under.append(acc_under)
    precs_under.append(prec_under)
    recalls_under.append(rec_under)
    f1s_under.append(f1_under)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_under):.2f} ± {np.std(accs_under):.2f}")
print(f"Precisão média: {np.mean(precs_under):.2f} ± {np.std(precs_under):.2f}")
print(f"Recall médio: {np.mean(recalls_under):.2f} ± {np.std(recalls_under):.2f}")
print(f"F1-Score médio: {np.mean(f1s_under):.2f} ± {np.std(f1s_under):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.83 ± 0.03
Precisão média: 0.82 ± 0.04
Recall médio: 0.85 ± 0.04
F1-Score médio: 0.83 ± 0.03


In [None]:
# Converter para arrays, se necessário
X_array_over = X_over.values if hasattr(X_over, "values") else np.array(X_over)
y_array_over = y_over.values if hasattr(y_over, "values") else np.array(y_over)

# Inicializar K-Fold
skf_over = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs_over, precs_over, recalls_over, f1s_over = [], [], [], []

# Loop pelos folds
for fold_over, (train_idx_over, test_idx_over) in enumerate(skf_over.split(X_array_over, y_array_over)):
    X_train_over, X_test_over = X_array_over[train_idx_over], X_array_over[test_idx_over]
    y_train_over, y_test_over = y_array_over[train_idx_over], y_array_over[test_idx_over]

    nb_model_over = GaussianNB()
    nb_model_over.fit(X_train_over, y_train_over)
    y_pred_over = nb_model_over.predict(X_test_over)

    acc_over = accuracy_score(y_test_over, y_pred_over)
    prec_over = precision_score(y_test_over, y_pred_over)
    rec_over = recall_score(y_test_over, y_pred_over)
    f1_over = f1_score(y_test_over, y_pred_over)

    accs_over.append(acc_over)
    precs_over.append(prec_over)
    recalls_over.append(rec_over)
    f1s_over.append(f1_over)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_over):.2f} ± {np.std(accs_over):.2f}")
print(f"Precisão média: {np.mean(precs_over):.2f} ± {np.std(precs_over):.2f}")
print(f"Recall médio: {np.mean(recalls_over):.2f} ± {np.std(recalls_over):.2f}")
print(f"F1-Score médio: {np.mean(f1s_over):.2f} ± {np.std(f1s_over):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.85 ± 0.02
Precisão média: 0.83 ± 0.03
Recall médio: 0.86 ± 0.04
F1-Score médio: 0.85 ± 0.02


### LSTM

In [None]:
# Converter para arrays, se necessário
X_array_lstm = X.values if hasattr(X, "values") else np.array(X)
y_array_lstm = y.values if hasattr(y, "values") else np.array(y)

# Inicializar o K-Fold
skf_lstm = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar as métricas
accs_lstm, precs_lstm, recalls_lstm, f1s_lstm = [], [], [], []

for fold, (train_idx, test_idx) in enumerate(skf_lstm.split(X_array_lstm, y_array_lstm)):
    X_train, X_test = X_array_lstm[train_idx], X_array_lstm[test_idx]
    y_train, y_test = y_array_lstm[train_idx], y_array_lstm[test_idx]

    # Padronização
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Remodelar para [samples, timesteps, features]
    X_train_lstm = X_train_scaled.reshape(-1, 1, X_train_scaled.shape[1])
    X_test_lstm = X_test_scaled.reshape(-1, 1, X_test_scaled.shape[1])

    # Criar e treinar o modelo
    lstm_model = Sequential()
    lstm_model.add(LSTM(units=1, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))
    lstm_model.add(Dense(1, activation='sigmoid'))
    lstm_model.compile(optimizer='adam', loss='binary_crossentropy')
    lstm_model.fit(X_train_lstm, y_train, epochs=10, verbose=0)

    # Previsão
    y_pred = (lstm_model.predict(X_test_lstm) > 0.5).astype("int64")

    # Métricas
    accs.append(accuracy_score(y_test, y_pred))
    precs.append(precision_score(y_test, y_pred))
    recalls.append(recall_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred))

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs):.2f} ± {np.std(accs):.2f}")
print(f"Precisão média: {np.mean(precs):.2f} ± {np.std(precs):.2f}")
print(f"Recall médio: {np.mean(recalls):.2f} ± {np.std(recalls):.2f}")
print(f"F1-Score médio: {np.mean(f1s):.2f} ± {np.std(f1s):.2f}")

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.83 ± 0.04
Precisão média: 0.85 ± 0.05
Recall médio: 0.94 ± 0.05
F1-Score médio: 0.89 ± 0.02


In [None]:
# Converter para arrays
X_array_under = X_under.values if hasattr(X_under, "values") else np.array(X_under)
y_array_under = y_under.values if hasattr(y_under, "values") else np.array(y_under)

# Inicializar o K-Fold
skf_under = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Métricas
accs_under, precs_under, recalls_under, f1s_under = [], [], [], []

for fold, (train_idx, test_idx) in enumerate(skf_under.split(X_array_under, y_array_under)):
    X_train_under, X_test_under = X_array_under[train_idx], X_array_under[test_idx]
    y_train_under, y_test_under = y_array_under[train_idx], y_array_under[test_idx]

    # Padronização
    scaler_under = StandardScaler()
    X_train_scaled_under = scaler_under.fit_transform(X_train_under)
    X_test_scaled_under = scaler_under.transform(X_test_under)

    # Remodelar para [samples, timesteps, features]
    X_train_under = X_train_scaled_under.reshape(-1, 1, X_train_scaled_under.shape[1])
    X_test_under = X_test_scaled_under.reshape(-1, 1, X_test_scaled_under.shape[1])

    # Modelo LSTM
    lstm_model_under = Sequential()
    lstm_model_under.add(LSTM(units=1, input_shape=(X_train_under.shape[1], X_train_under.shape[2])))
    lstm_model_under.add(Dense(1, activation='sigmoid'))
    lstm_model_under.compile(optimizer='adam', loss='binary_crossentropy')
    lstm_model_under.fit(X_train_under, y_train_under, epochs=10, verbose=0)

    # Previsão
    y_pred_under = (lstm_model_under.predict(X_test_under) > 0.5).astype("int64")

    # Métricas
    accs_under.append(accuracy_score(y_test_under, y_pred_under))
    precs_under.append(precision_score(y_test_under, y_pred_under))
    recalls_under.append(recall_score(y_test_under, y_pred_under))
    f1s_under.append(f1_score(y_test_under, y_pred_under))

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_under):.2f} ± {np.std(accs_under):.2f}")
print(f"Precisão média: {np.mean(precs_under):.2f} ± {np.std(precs_under):.2f}")
print(f"Recall médio: {np.mean(recalls_under):.2f} ± {np.std(recalls_under):.2f}")
print(f"F1-Score médio: {np.mean(f1s_under):.2f} ± {np.std(f1s_under):.2f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 68ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.69 ± 0.08
Precisão média: 0.70 ± 0.10
Recall médio: 0.67 ± 0.08
F1-Score médio: 0.69 ± 0.07


In [None]:
# Converter para arrays
X_array_over = X_over.values if hasattr(X_over, "values") else np.array(X_over)
y_array_over = y_over.values if hasattr(y_over, "values") else np.array(y_over)

# Inicializar o K-Fold
skf_over = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Métricas
accs_over, precs_over, recalls_over, f1s_over = [], [], [], []

for fold, (train_idx, test_idx) in enumerate(skf_over.split(X_array_over, y_array_over)):
    X_train_over, X_test_over = X_array_over[train_idx], X_array_over[test_idx]
    y_train_over, y_test_over = y_array_over[train_idx], y_array_over[test_idx]

    # Padronização
    scaler_over = StandardScaler()
    X_train_scaled_over = scaler_over.fit_transform(X_train_over)
    X_test_scaled_over = scaler_over.transform(X_test_over)

    # Remodelar para [samples, timesteps, features]
    X_train_over = X_train_scaled_over.reshape(-1, 1, X_train_scaled_over.shape[1])
    X_test_over = X_test_scaled_over.reshape(-1, 1, X_test_scaled_over.shape[1])

    # Modelo LSTM
    lstm_model_over = Sequential()
    lstm_model_over.add(LSTM(units=1, input_shape=(X_train_over.shape[1], X_train_over.shape[2])))
    lstm_model_over.add(Dense(1, activation='sigmoid'))
    lstm_model_over.compile(optimizer='adam', loss='binary_crossentropy')
    lstm_model_over.fit(X_train_over, y_train_over, epochs=10, verbose=0)

    # Previsão
    y_pred_over = (lstm_model_over.predict(X_test_over) > 0.5).astype("int64")

    # Métricas
    accs_over.append(accuracy_score(y_test_over, y_pred_over))
    precs_over.append(precision_score(y_test_over, y_pred_over))
    recalls_over.append(recall_score(y_test_over, y_pred_over))
    f1s_over.append(f1_score(y_test_over, y_pred_over))

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_over):.2f} ± {np.std(accs_over):.2f}")
print(f"Precisão média: {np.mean(precs_over):.2f} ± {np.std(precs_over):.2f}")
print(f"Recall médio: {np.mean(recalls_over):.2f} ± {np.std(recalls_over):.2f}")
print(f"F1-Score médio: {np.mean(f1s_over):.2f} ± {np.std(f1s_over):.2f}")

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.83 ± 0.03
Precisão média: 0.84 ± 0.06
Recall médio: 0.84 ± 0.07
F1-Score médio: 0.83 ± 0.03


### KNN

In [None]:
# Converter para arrays, se necessário
X_array = X.values if hasattr(X, "values") else np.array(X)
y_array = y.values if hasattr(y, "values") else np.array(y)

# Inicializar K-Fold
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs, precs, recalls, f1s = [], [], [], []

# Loop pelos folds
for fold, (train_idx, test_idx) in enumerate(skf.split(X_array, y_array)):
    X_train, X_test = X_array[train_idx], X_array[test_idx]
    y_train, y_test = y_array[train_idx], y_array[test_idx]

    knn_model = KNeighborsClassifier(n_neighbors=100)
    knn_model.fit(X_train, y_train)
    y_pred = knn_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    accs.append(acc)
    precs.append(prec)
    recalls.append(rec)
    f1s.append(f1)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs):.2f} ± {np.std(accs):.2f}")
print(f"Precisão média: {np.mean(precs):.2f} ± {np.std(precs):.2f}")
print(f"Recall médio: {np.mean(recalls):.2f} ± {np.std(recalls):.2f}")
print(f"F1-Score médio: {np.mean(f1s):.2f} ± {np.std(f1s):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.88 ± 0.03
Precisão média: 0.88 ± 0.02
Recall médio: 0.96 ± 0.02
F1-Score médio: 0.92 ± 0.02


In [None]:
# Converter para arrays, se necessário
X_array_under = X_under.values if hasattr(X_under, "values") else np.array(X_under)
y_array_under = y_under.values if hasattr(y_under, "values") else np.array(y_under)

# Inicializar K-Fold
skf_under = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs_under, precs_under, recalls_under, f1s_under = [], [], [], []

# Loop pelos folds
for fold_under, (train_idx_under, test_idx_under) in enumerate(skf_under.split(X_array_under, y_array_under)):
    X_train_under, X_test_under = X_array_under[train_idx_under], X_array_under[test_idx_under]
    y_train_under, y_test_under = y_array_under[train_idx_under], y_array_under[test_idx_under]

    knn_model_under = KNeighborsClassifier(n_neighbors=100)
    knn_model_under.fit(X_train_under, y_train_under)
    y_pred_under = knn_model_under.predict(X_test_under)

    acc_under = accuracy_score(y_test_under, y_pred_under)
    prec_under = precision_score(y_test_under, y_pred_under)
    rec_under = recall_score(y_test_under, y_pred_under)
    f1_under = f1_score(y_test_under, y_pred_under)

    accs_under.append(acc_under)
    precs_under.append(prec_under)
    recalls_under.append(rec_under)
    f1s_under.append(f1_under)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_under):.2f} ± {np.std(accs_under):.2f}")
print(f"Precisão média: {np.mean(precs_under):.2f} ± {np.std(precs_under):.2f}")
print(f"Recall médio: {np.mean(recalls_under):.2f} ± {np.std(recalls_under):.2f}")
print(f"F1-Score médio: {np.mean(f1s_under):.2f} ± {np.std(f1s_under):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.84 ± 0.05
Precisão média: 0.88 ± 0.04
Recall médio: 0.80 ± 0.08
F1-Score médio: 0.83 ± 0.06


In [None]:
# Converter para arrays, se necessário
X_array_over = X_over.values if hasattr(X_over, "values") else np.array(X_over)
y_array_over = y_over.values if hasattr(y_over, "values") else np.array(y_over)

# Inicializar K-Fold
skf_over = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Listas para armazenar métricas de todos os folds
accs_over, precs_over, recalls_over, f1s_over = [], [], [], []

# Loop pelos folds
for fold_over, (train_idx_over, test_idx_over) in enumerate(skf_over.split(X_array_over, y_array_over)):
    X_train_over, X_test_over = X_array_over[train_idx_over], X_array_over[test_idx_over]
    y_train_over, y_test_over = y_array_over[train_idx_over], y_array_over[test_idx_over]

    knn_model_over = KNeighborsClassifier(n_neighbors=100)
    knn_model_over.fit(X_train_over, y_train_over)
    y_pred_over = knn_model_over.predict(X_test_over)

    acc_over = accuracy_score(y_test_over, y_pred_over)
    prec_over = precision_score(y_test_over, y_pred_over)
    rec_over = recall_score(y_test_over, y_pred_over)
    f1_over = f1_score(y_test_over, y_pred_over)

    accs_over.append(acc_over)
    precs_over.append(prec_over)
    recalls_over.append(rec_over)
    f1s_over.append(f1_over)

# Mostrar médias e desvios padrão
print("MÉDIAS FINAIS (± desvio padrão):")
print(f"Acurácia média: {np.mean(accs_over):.2f} ± {np.std(accs_over):.2f}")
print(f"Precisão média: {np.mean(precs_over):.2f} ± {np.std(precs_over):.2f}")
print(f"Recall médio: {np.mean(recalls_over):.2f} ± {np.std(recalls_over):.2f}")
print(f"F1-Score médio: {np.mean(f1s_over):.2f} ± {np.std(f1s_over):.2f}")

MÉDIAS FINAIS (± desvio padrão):
Acurácia média: 0.87 ± 0.02
Precisão média: 0.90 ± 0.03
Recall médio: 0.85 ± 0.04
F1-Score médio: 0.87 ± 0.02
