In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.model_selection import LeaveOneOut, cross_val_score, GroupKFold, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from tabulate import tabulate
from sklearn.metrics import confusion_matrix
from sklearn.feature_selection import SelectKBest, f_classif
from scipy.fft import fft
import time
from pyentrp import entropy as ent
import warnings
from sklearn.exceptions import ConvergenceWarning
import csv
from sklearn.pipeline import Pipeline

In [2]:
# Function to reduce features using SelectKBest
def select_features(X_train, y_train, X_test, k=10):
    # Initialize and fit SelectKBest
    selector = SelectKBest(score_func=f_classif, k=k)
    selector.fit(X_train, y_train)
    
    # Transform both training and testing data
    X_train_reduced = selector.transform(X_train)
    X_test_reduced = selector.transform(X_test)
    
    return X_train_reduced, X_test_reduced

In [5]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

def run_svm_grid_search(features, labels):
    pipe = Pipeline([
        ('select_k_best', SelectKBest(score_func=f_classif)),
        ('svm', LinearSVC(dual=False, max_iter=5000))  # Aumentado max_iter para mejorar la convergencia
    ])

    # Definir rangos para 'k'
    k_range = list(range(30, 51, 1))

    params = {
        'select_k_best__k': k_range,
        'svm__C': [100, 250, 500, 1000],  # Explorar diferentes valores de C alrededor de 100
        'svm__penalty': ['l1'],       # Foco en l1 ya que es el mejor
        'svm__loss': ['squared_hinge']  # Foco en squared_hinge ya que es el mejor
    }

    grid_search = GridSearchCV(pipe, param_grid=params, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(features, labels)
    
    print("Optimized parameters for SVM:", grid_search.best_params_)
    print("Optimized cross-validation score: {:.2f}".format(grid_search.best_score_))

def process_file(file_path):
    df = pd.read_csv(file_path)
    labels = df['label']
    features = df.drop(columns=['subject_id', 'label'])
    run_svm_grid_search(features, labels)

def main(folder_path):
    import os
    for file_name in os.listdir(folder_path):
        if file_name.startswith("features_") and file_name.endswith(".csv"):
            file_path = os.path.join(folder_path, file_name)
            print(f"\nProcessing file: {file_name}")
            process_file(file_path)

# Cambia la ruta según corresponda
main('/home/ximo/Escritorio/ProyectoTFG/featuresExtended')



Processing file: features_1.csv


KeyboardInterrupt: 

Processing file: features_1.csv
Optimized parameters for SVM: {'select_k_best__k': 37, 'svm__C': 100, 'svm__loss': 'squared_hinge', 'svm__penalty': 'l1'}
Optimized cross-validation score: 0.47

Processing file: features_4.csv
Optimized parameters for SVM: {'select_k_best__k': 30, 'svm__C': 100, 'svm__loss': 'squared_hinge', 'svm__penalty': 'l1'}
Optimized cross-validation score: 0.46

Processing file: features_3.csv
Optimized parameters for SVM: {'select_k_best__k': 42, 'svm__C': 100, 'svm__loss': 'squared_hinge', 'svm__penalty': 'l1'}
Optimized cross-validation score: 0.46

Processing file: features_2.csv
Optimized parameters for SVM: {'select_k_best__k': 39, 'svm__C': 100, 'svm__loss': 'squared_hinge', 'svm__penalty': 'l1'}
Optimized cross-validation score: 0.48

In [7]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

def run_svm_grid_search(features, labels):
    pipe = Pipeline([
        ('select_k_best', SelectKBest(score_func=f_classif)),
        ('svm', LinearSVC(dual=False, max_iter=10000))  # Aumentado max_iter para mejorar la convergencia
    ])

    # Definir rangos para 'k'
    k_range = list(range(30, 51, 1))

    params = {
        'select_k_best__k': k_range,
        'svm__C': [100, 250, 500, 1000],  # Explorar diferentes valores de C alrededor de 100
        'svm__penalty': ['l1'],       # Foco en l1 ya que es el mejor
        'svm__loss': ['squared_hinge']  # Foco en squared_hinge ya que es el mejor
    }

    grid_search = GridSearchCV(pipe, param_grid=params, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(features, labels)
    
    print("Optimized parameters for SVM:", grid_search.best_params_)
    print("Optimized cross-validation score: {:.2f}".format(grid_search.best_score_))

def process_file(file_path):
    df = pd.read_csv(file_path)
    labels = df['label']
    features = df.drop(columns=['subject_id', 'label'])
    run_svm_grid_search(features, labels)

def main(folder_path):
    import os
    for file_name in os.listdir(folder_path):
        if file_name.startswith("features_") and file_name.endswith(".csv"):
            file_path = os.path.join(folder_path, file_name)
            print(f"\nProcessing file: {file_name}")
            process_file(file_path)

# Cambia la ruta según corresponda
main('/home/ximo/Escritorio/ProyectoTFG/featuresExtended')



Processing file: features_1.csv




Optimized parameters for SVM: {'select_k_best__k': 44, 'svm__C': 250, 'svm__loss': 'squared_hinge', 'svm__penalty': 'l1'}
Optimized cross-validation score: 0.49

Processing file: features_4.csv




Optimized parameters for SVM: {'select_k_best__k': 46, 'svm__C': 100, 'svm__loss': 'squared_hinge', 'svm__penalty': 'l1'}
Optimized cross-validation score: 0.46

Processing file: features_3.csv




Optimized parameters for SVM: {'select_k_best__k': 46, 'svm__C': 250, 'svm__loss': 'squared_hinge', 'svm__penalty': 'l1'}
Optimized cross-validation score: 0.47

Processing file: features_2.csv




Optimized parameters for SVM: {'select_k_best__k': 50, 'svm__C': 250, 'svm__loss': 'squared_hinge', 'svm__penalty': 'l1'}
Optimized cross-validation score: 0.49


