In [156]:
import numpy as np
from sklearn.svm import SVR
import torch
from sklearn.model_selection import KFold
from itertools import product

In [157]:
def mean_euclidean_error(vectors1, vectors2):
    """
    Compute the mean Euclidean error between two sets of 3D vectors.

    Parameters:
    - vectors1: NumPy array of shape (N, 3) representing the first set of 3D vectors
    - vectors2: NumPy array of shape (N, 3) representing the second set of 3D vectors

    Returns:
    - mean_error: Mean Euclidean error between the two sets of vectors
    """
    # Check if the input arrays have the correct shape
    if vectors1.shape != vectors2.shape or vectors1.shape[1] != 3:
        raise ValueError("Input arrays must be of shape (N, 3)")

    # Compute Euclidean distance
    euclidean_distance = np.linalg.norm(vectors1 - vectors2, axis=1)

    # Calculate the mean Euclidean error
    mean_error = np.mean(euclidean_distance)

    return mean_error

In [158]:
from sklearn.model_selection import train_test_split
# load the dataset, split into input (X) and output (y) variables
dataset = np.loadtxt('ML-CUP23-TR.csv', delimiter=',')
X = dataset[:,1:11]
y = dataset[:,11:14]



# Split the data into training and testing sets (80%/20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [159]:
class MultiSVM:
    def __init__(self, kernel='rbf', C0=1.0, C1=1.0, C2=1.0, epsilon0=0.1, epsilon1=0.1, epsilon2=0.1):
        # Create three support vector regressors with the specified kernel, regularization parameter, and epsilon
        self.svr0 = SVR(kernel=kernel, C=C0, epsilon=epsilon0)
        self.svr1 = SVR(kernel=kernel, C=C1, epsilon=epsilon1)
        self.svr2 = SVR(kernel=kernel, C=C2, epsilon=epsilon2)

    def fit(self, X, y):
        # Fit each SVR on its respective data
        self.svr0.fit(X, y[:,0])
        self.svr1.fit(X, y[:,1])
        self.svr2.fit(X, y[:,2])

    def predict(self, X):
        # Make predictions using each SVR
        pred = np.column_stack((self.svr0.predict(X),self.svr1.predict(X),self.svr2.predict(X)))
        return pred

In [160]:
def perform_grid_search_kfold(kernels, C0s, C1s, C2s, epsilon0s, epsilon1s, epsilon2s, k_folds, x, y):
    """
    Perform grid search with k-fold cross-validation for hyperparameters.

    Parameters:
    - neuron_numbers (list): List of neuron numbers to search.
    - learning_rates (list): List of learning rates to search.
    - momentums (list): List of momentum values to search.
    - batch_sizes (list): List of batch sizes to search.
    - reg_coeffs (list): List of regularization coefficients to search.
    - activations (list): List of activation functions to search.
    - layerss (list): List of numbers of hidden layers to search.
    - k_folds (int): Number of folds for cross-validation.
    - x (numpy.ndarray): Input data.
    - y (numpy.ndarray): Target data.
    - plot_curves (bool, optional): Whether to plot training curves (default: False).
    - num_epochs (int, optional): Number of training epochs (default: 1000).

    Returns:
    - list: List of best hyperparameters.

    The function performs grid search with k-fold cross-validation for Monk classifier hyperparameters and returns the best hyperparameters.
    """

    best_mee = float('inf')
    best_hyperparams = []
    counter = 0
    num_combinations = sum(1 for _ in product(kernels, C0s, C1s, C2s, epsilon0s, epsilon1s, epsilon2s))
    print('total number of grid search combinations explored:',num_combinations)
    for kernel, C0, C1, C2, epsilon0, epsilon1, epsilon2 in product(kernels, C0s, C1s, C2s, epsilon0s, epsilon1s, epsilon2s):
        counter += 1
        print(f'{counter}/{num_combinations} Hyperparams:',kernel, C0, C1, C2, epsilon0, epsilon1, epsilon2)

        kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)
        val_mees = []

        # Perform K-fold cross-validation
        for fold, (train_indices, val_indices) in enumerate(kf.split(x,y)):
            #print(f"\nFold {fold + 1}/{k_folds}")

            # Split the data into training and validation (or test) sets
            X_train, X_val = x[train_indices], x[val_indices]
            y_train, y_val = y[train_indices], y[val_indices]

            model = MultiSVM(kernel, C0, C1, C2, epsilon0, epsilon1, epsilon2)
            model.fit(X_train,y_train)
            val_mees.append(mean_euclidean_error(model.predict(X_val),y_val))

        print(f'Final Results: kernel={kernel}; C={C0,C1,C2}; epsilon={epsilon0,epsilon1,epsilon2} --> '
            f'val_mee = {np.mean(val_mees):.4} +- {np.std(val_mees):.4}')

        if np.mean(val_mees) < best_mee:
            best_mee = np.mean(val_mees)
            best_hyperparams = [kernel, C0, C1, C2, epsilon0, epsilon1, epsilon2]

    print('Best Hp:',best_hyperparams)
    return best_hyperparams

In [161]:
'''
model = MultiSVM(kernel='rbf', C0=100, C1=100, C2=100, epsilon0=0.1, epsilon1=0.1, epsilon2=0.1)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

mean_euclidean_error(y_pred,y_test)

'''

"\nmodel = MultiSVM(kernel='rbf', C0=100, C1=100, C2=100, epsilon0=0.1, epsilon1=0.1, epsilon2=0.1)\nmodel.fit(X_train,y_train)\ny_pred = model.predict(X_test)\n\nmean_euclidean_error(y_pred,y_test)\n\n"

In [162]:
kernels = ['rbf']
C0s = [100,200,300]
C1s = [100,200,300]
C2s = [100,200,300]
epsilon0s = [0.1,0.2,0.3]
epsilon1s = [0.1,0.2,0.3]
epsilon2s = [0.1,0.2,0.3]

best_hyperparams = perform_grid_search_kfold(kernels,
                          C0s,
                          C1s,
                          C2s,
                          epsilon0s,
                          epsilon1s,
                          epsilon2s,
                          k_folds=5,
                          x=X_train,
                          y=y_train)

total number of grid search combinations explored: 729
1/729 Hyperparams: rbf 100 100 100 0.1 0.1 0.1
Final Results: kernel=rbf; C=(100, 100, 100); epsilon=(0.1, 0.1, 0.1) --> val_mee = 0.8765 +- 0.07218
2/729 Hyperparams: rbf 100 100 100 0.1 0.1 0.2
Final Results: kernel=rbf; C=(100, 100, 100); epsilon=(0.1, 0.1, 0.2) --> val_mee = 0.8802 +- 0.0692
3/729 Hyperparams: rbf 100 100 100 0.1 0.1 0.3
Final Results: kernel=rbf; C=(100, 100, 100); epsilon=(0.1, 0.1, 0.3) --> val_mee = 0.8929 +- 0.06706
4/729 Hyperparams: rbf 100 100 100 0.1 0.2 0.1
Final Results: kernel=rbf; C=(100, 100, 100); epsilon=(0.1, 0.2, 0.1) --> val_mee = 0.873 +- 0.06825
5/729 Hyperparams: rbf 100 100 100 0.1 0.2 0.2
Final Results: kernel=rbf; C=(100, 100, 100); epsilon=(0.1, 0.2, 0.2) --> val_mee = 0.8766 +- 0.06548
6/729 Hyperparams: rbf 100 100 100 0.1 0.2 0.3
Final Results: kernel=rbf; C=(100, 100, 100); epsilon=(0.1, 0.2, 0.3) --> val_mee = 0.8893 +- 0.06333
7/729 Hyperparams: rbf 100 100 100 0.1 0.3 0.1
Final 

KeyboardInterrupt: 