In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.model_selection import LeaveOneOut, cross_val_score, GroupKFold, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from tabulate import tabulate
from sklearn.metrics import confusion_matrix
from sklearn.feature_selection import SelectKBest, f_classif
from scipy.fft import fft
import time
from pyentrp import entropy as ent
import warnings
from sklearn.exceptions import ConvergenceWarning
import csv
from sklearn.pipeline import Pipeline

In [2]:
# Function to reduce features using SelectKBest
def select_features(X_train, y_train, X_test, k=10):
    # Initialize and fit SelectKBest
    selector = SelectKBest(score_func=f_classif, k=k)
    selector.fit(X_train, y_train)
    
    # Transform both training and testing data
    X_train_reduced = selector.transform(X_train)
    X_test_reduced = selector.transform(X_test)
    
    return X_train_reduced, X_test_reduced

In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import ExtraTreesClassifier

def run_et_grid_search(features, labels):
    pipe = Pipeline([
        ('select_k_best', SelectKBest(score_func=f_classif)),
        ('et', ExtraTreesClassifier())
    ])

    # Definir rangos para 'k' con saltos de 3 en 3, ajusta según necesidad
    k_range = list(range(20, min(45, features.shape[1] + 1), 1))

    params = {
        'select_k_best__k': k_range,
        'et__n_estimators': [50, 100, 200],  # Número de árboles
        'et__max_depth': [None, 10, 20],     # Profundidad máxima del árbol
        'et__min_samples_split': [5, 10, 15]  # Número mínimo de muestras necesarias para dividir un nodo
    }

    grid_search = GridSearchCV(pipe, param_grid=params, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(features, labels)
    
    print("Optimized parameters for ExtraTrees:", grid_search.best_params_)
    print("Optimized cross-validation score: {:.2f}".format(grid_search.best_score_))

def process_file(file_path):
    df = pd.read_csv(file_path)
    labels = df['label']
    features = df.drop(columns=['subject_id', 'label'])
    run_et_grid_search(features, labels)

def main(folder_path):
    import os
    for file_name in os.listdir(folder_path):
        if file_name.startswith("features_") and file_name.endswith(".csv"):
            file_path = os.path.join(folder_path, file_name)
            print(f"\nProcessing file: {file_name}")
            process_file(file_path)

# Cambia la ruta según corresponda
main('/home/ximo/Escritorio/ProyectoTFG/featuresExtended')



Processing file: features_1.csv
Optimized parameters for ExtraTrees: {'et__max_depth': None, 'et__min_samples_split': 15, 'et__n_estimators': 100, 'select_k_best__k': 43}
Optimized cross-validation score: 0.52

Processing file: features_4.csv
Optimized parameters for ExtraTrees: {'et__max_depth': None, 'et__min_samples_split': 10, 'et__n_estimators': 100, 'select_k_best__k': 28}
Optimized cross-validation score: 0.51

Processing file: features_3.csv
Optimized parameters for ExtraTrees: {'et__max_depth': None, 'et__min_samples_split': 15, 'et__n_estimators': 100, 'select_k_best__k': 38}
Optimized cross-validation score: 0.52

Processing file: features_2.csv
Optimized parameters for ExtraTrees: {'et__max_depth': None, 'et__min_samples_split': 15, 'et__n_estimators': 200, 'select_k_best__k': 36}
Optimized cross-validation score: 0.52
