# Exercises

In this section we have two exercises:
1. Implement the polynomial kernel.
2. Implement the multiclass C-SVM.

## Polynomial kernel

You need to extend the ``build_kernel`` function and implement the polynomial kernel if the ``kernel_type`` is set to 'poly'. The equation that needs to be implemented:
\begin{equation}
K=(X^{T}*Y)^{d}.
\end{equation}

In [None]:
from sklearn.datasets import load_iris
import numpy as np
from sklearn.model_selection import train_test_split
import cvxopt

iris = load_iris()
data_set = iris.data
labels = iris.target

data_set = data_set[labels!=2]
labels = labels[labels!=2]

train_data_set, test_data_set, train_labels, test_labels = train_test_split(
    data_set, labels, test_size=0.2, random_state=15)

train_labels[train_labels<1] = -1
test_labels[test_labels<1] = -1

objects_count = len(train_labels)

In [None]:
def build_kernel(data_set, targets, kernel_type='linear', degree = 2):
    kernel = np.dot(data_set, data_set.T)
    if kernel_type == 'rbf':
        sigma = 1.0
        objects_count = len(data_set)
        b = np.ones((len(data_set), 1))
        kernel -= 0.5 * (np.dot((np.diag(kernel)*np.ones((1, objects_count))).T, b.T)
                         + np.dot(b, (np.diag(kernel) * np.ones((1, objects_count))).T.T))
        kernel = np.exp(kernel / (2. * sigma ** 2))
    elif kernel_type == 'poly':
        kernel = np.dot(data_set.T, targets)**degree
    return kernel

In [None]:
train_data_set

In [None]:
train_labels

In [None]:
kernel = build_kernel(train_data_set, train_labels, kernel_type='poly')
kernel

## Implement a multiclass C-SVM

Use the classification method that we used in notebook 7.3 and IRIS dataset to build a multiclass C-SVM classifier. Most implementation is about a function that will return the proper data set that need to be used for the prediction. You need to implement:
- ``choose_set_for_label``
- ``get_labels_count``

In [None]:
def choose_set_for_label(data_set, labels, label, test_ratio=0.3):
    # get idx with the selected label
    label_indices = np.where(labels == label)[0]
    
    #print(label_indices)
    
    # shuffle
    np.random.shuffle(label_indices)
    test_size = int(len(label_indices) * test_ratio)
    
    # split data
    test_indices = label_indices[:test_size]
    train_indices = label_indices[test_size:]
    train_data_set = data_set[train_indices]
    test_data_set = data_set[test_indices]
    
    # split labels
    train_labels = labels[train_indices]
    test_labels = labels[test_indices]

    return train_data_set, test_data_set, train_labels, test_labels

In [None]:
choose_set_for_label(data_set, labels,  1)

In [None]:
train_labels

In [None]:
def get_labels_count(labels):
    unique_labels, counts = np.unique(labels, return_counts=True)
    labels_count = dict(zip(unique_labels, counts))
    
    return labels_count

Use the code that we have implemented earlier:

In [78]:
def train(train_data_set, train_labels, kernel_type='linear', C=10, threshold=1e-5):
    kernel = build_kernel(train_data_set, kernel_type=kernel_type)

    objects_count = len(train_labels)
    P = train_labels * train_labels.transpose() * kernel
    q = -np.ones((objects_count, 1))
    G = np.concatenate((np.eye(objects_count), -np.eye(objects_count)))
    h = np.concatenate((C * np.ones((objects_count, 1)), np.zeros((objects_count, 1))))

    A = train_labels.reshape(1, objects_count)
    A = A.astype(float)
    b = 0.0

    sol = cvxopt.solvers.qp(cvxopt.matrix(P), cvxopt.matrix(q), cvxopt.matrix(G), cvxopt.matrix(h), cvxopt.matrix(A), cvxopt.matrix(b))

    lambdas = np.array(sol['x'])

    support_vectors_id = np.where(lambdas > threshold)[0]
    vector_number = len(support_vectors_id)
    support_vectors = train_data_set[support_vectors_id, :]

    lambdas = lambdas[support_vectors_id]
    targets = train_labels[support_vectors_id]

    b = np.sum(targets)
    for n in range(vector_number):
        b -= np.sum(lambdas * targets * np.reshape(kernel[support_vectors_id[n], support_vectors_id], (vector_number, 1)))
    b /= len(lambdas)

    return lambdas, support_vectors, support_vectors_id, b, targets, vector_number

def build_kernel(data_set1, data_set2=None, kernel_type='linear'):
    if data_set2 is None:
        data_set2 = data_set1
    kernel = np.dot(data_set1, data_set2.T)
    if kernel_type == 'rbf':
        sigma = 1.0
        objects_count = len(data_set1)
        b = np.ones((1, len(data_set2)))
        kernel -= 0.5 * (np.dot((np.diag(kernel)*np.ones((1, objects_count))).T, b.T)
                         + np.dot(b, (np.diag(kernel) * np.ones((1, objects_count))).T.T))
        kernel = np.exp(kernel / (2. * sigma ** 2))
    return kernel        

def train_mc(train_data_set, train_labels, kernel_type='linear', C=10, threshold=1e-5):
    labels_count = get_labels_count(train_labels)
    lambdas, support_vectors, support_vectors_id, b = {}, {}, {}, {}
    
    for label in labels_count:
        # Choose the training subset for the current label
        subset_data, _, subset_labels, _ = choose_set_for_label(train_data_set, train_labels, label)
        
        # Train binary SVM for the current label using chosen kernel
        curr_lambdas, curr_sv, curr_sv_id, curr_b, curr_t, curr_vn = train(subset_data, subset_labels, kernel_type, C, threshold)
        
        # Save the trained model parameters for the current label
        lambdas[label], support_vectors[label], support_vectors_id[label], b[label] = curr_lambdas, curr_sv, curr_sv_id, curr_b
        
    return lambdas, support_vectors, support_vectors_id, b

def classify_rbf(test_data_set, train_data_set, models):
    kernel = build_kernel(test_data_set, train_data_set, kernel_type='rbf')
    labels_count = len(models)
    
    predictions = []
    for i in range(len(test_data_set)):
        scores = np.zeros(labels_count)
        for label in range(labels_count):
            # Get the trained model parameters for the current label
            lambdas, sv, sv_id, b = models[label]
            
            # Compute the kernel function between the test sample and support vectors
            curr_kernel = kernel[i, sv_id]
            c = (1. / 1.0 * np.sum(test_data_set[i] ** 2)) * np.ones((1, np.shape(sv)[0]))
            sv_diag = np.diag(np.dot(sv, sv.T))
            aa = np.dot(sv_diag[np.newaxis, :], np.ones((np.shape(kernel)[0], 1)))
            curr_kernel = curr_kernel - 0.5 * c - 0.5 * aa
            curr_kernel = np.exp(curr_kernel / (2. * 1.0 ** 2))
            
            # Compute the score for the current label
            scores[label] = np.dot(lambdas.T, curr_kernel * sv.T) + b
        
        # Assign the label with the highest score to the current test sample
        prediction = np.argmax(scores)
        predictions.append(prediction)
    
    return np.array(predictions)

In [79]:
lambdas, support_vectors, support_vectors_id, b = train_mc(train_data_set, train_labels, kernel_type='rbf')
predicted = classify_rbf(test_data_set, train_data_set, zip(lambdas.values(), support_vectors.values(), support_vectors_id.values(), b.values()))
accuracy = accuracy_score(predicted, test_labels)
print(f'Accuracy: {accuracy}')

ValueError: shapes (29,1) and (29,1) not aligned: 1 (dim 1) != 29 (dim 0)

# I don't know how to solve this error. It looks just like a simple dimension error but I spent hours trying to transpose and reshape stuff and some other methods and it doesn't work. I don't know how to fix it but the rest of the code seems to be right