In [8]:
import pandas as pd
X_train = pd.read_csv('X_train_norm_pca.csv')
X_test = pd.read_csv('X_test_norm_pca.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')

In [9]:
def optimize_svm_polynomial(X_train, X_test, y_train, y_test, number_of_trials=None, param_bounds=None, random_seed=8):
    import numpy as np
    import pandas as pd
    from sklearn.svm import SVC
    from sklearn.model_selection import cross_val_score
    from sklearn.metrics import accuracy_score
    import optuna

    default_param_bounds = {
        'C': [0.1, 1000], 
        'gamma': [1e-5, 1],  
    }

    if param_bounds:
        default_param_bounds.update(param_bounds)
    if number_of_trials is None:
        number_of_trials = 50 

    trials_df = pd.DataFrame(columns=['trial', 'cv_accuracy', 'test_accuracy', 'parameters'])

    def objective(trial):
    
        C = trial.suggest_float(
            'C',
            default_param_bounds['C'][0],
            default_param_bounds['C'][1],
            log=True
        )
        gamma = trial.suggest_float(
            'gamma',
            default_param_bounds['gamma'][0],
            default_param_bounds['gamma'][1],
            log=True
        )


        model = SVC(
            kernel='poly',
            degree=2, 
            C=C,
            gamma=gamma,
            random_state=random_seed
        )

        cv_scores = cross_val_score(model, X_train, y_train.values.ravel(), cv=5, scoring='accuracy', n_jobs=-1)
        cv_accuracy = np.mean(cv_scores)


        model.fit(X_train, y_train.values.ravel())
        y_pred = model.predict(X_test)
        test_accuracy = accuracy_score(y_test.values.ravel(), y_pred)


        trial_data = {
            'trial': trial.number,
            'cv_accuracy': cv_accuracy,
            'test_accuracy': test_accuracy,
            'parameters': {
                'C': C,
                'gamma': gamma,
            }
        }
        trials_df.loc[len(trials_df)] = trial_data

        return cv_accuracy

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=number_of_trials)

    print('Best hyperparameters:', study.best_params)
    print('Best cross-validation accuracy:', study.best_value)

    best_params = study.best_params
    best_model = SVC(
        kernel='poly',
        degree=2, 
        C=best_params['C'],
        gamma=best_params['gamma'],
        random_state=random_seed
    )
    best_model.fit(X_train, y_train.values.ravel())

    return best_model, trials_df


In [None]:
best_model, trials_df = optimize_svm_polynomial(X_train, X_test, y_train, y_test, number_of_trials=8)


In [None]:
pd.set_option('display.max_colwidth', None)
trials_df['total_acc']=(0.4*trials_df['cv_accuracy'])+(0.6*trials_df['test_accuracy'])
cols = trials_df.columns.tolist() 
cols.insert(1, cols.pop(cols.index('total_acc'))) 
trials_df = trials_df[cols]
trials_df = trials_df.sort_values(by='total_acc', ascending=False)
trials_df