In [None]:
# Imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from math import *
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_curve, auc
from sklearn import datasets
from sklearn import preprocessing
from sklearn.svm import LinearSVC
from sklearn.datasets import load_digits
import time
from numpy import linalg as LA
from sklearn import svm
from sklearn.model_selection import GridSearchCV 
from sklearn.utils import shuffle
from sklearn.model_selection import StratifiedKFold, cross_val_score, KFold
from scipy.stats import sem
from numpy import linalg as LA
from sklearn.metrics import pairwise_distances
from copy import deepcopy
from sklearn.cluster import KMeans
from numba import jit, cuda
import warnings
warnings.filterwarnings("ignore")

In [None]:
import sys
 
file_path = 'output.txt'
sys.stdout = open(file_path, "a")

In [None]:
def L0_norm(w, threshold):
    l0_norm = 0
    for wi in w:
        if abs(wi) > threshold:
            l0_norm += 1
    return l0_norm

In [None]:
def compute_margin(X, y, w, b):
    margin = []
    for i in range(y.shape[0]):
        margin.append((y[i]*(np.dot(X[i,:], w)+b))/sqrt(sum(w**2))) 
    if min(margin) >= 0:
        return min(margin)
    return 0

In [None]:
import random
from sklearn.base import BaseEstimator, ClassifierMixin
class RBF_IMA(BaseEstimator, ClassifierMixin):

    # Inicialization of important parameters 
    def __init__(self, n_neurons, eta=0.1, lambda_param=0.01, delta_margin=10^-3,
                 IMA_iterations=20, max_updates=10000, p='2'):
        self.n_neurons = n_neurons              # Neurons of hidden layer osf RBF
        self.eta = eta                          # Learning rate
        self.lambda_param = lambda_param        # Param important of soft margin
        self.delta_margin = delta_margin        # (1 + delta_margin) * fixed margin defines the minimum next margin of IMA
        self.IMA_iterations = IMA_iterations    # Maximum number of iterations of IMA
        self.max_updates = max_updates          # Maximum number of updates in one execution of FMP
        self.w = np.array([])                   # Vector of weights of the last layer of the RBF obtained after the training of the IMA
        self.w_rbf = np.array([])               # Vector of weights of the last layer of the RBF obtained after the normal training of RBF
        self.H = np.array([])                   # H matrix of RBF (obtained with training data)
        self.Z = np.array([])                   # Z matrix of RBF
        self.b = 0
        self.p = p 
        self.centers = []

    @staticmethod
    @jit(target_backend='cuda')    
    def pdfnvar(pairwise_dist, sigma):
        return np.exp(-pairwise_dist ** 2 / (2 * sigma ** 2))

    # Fixed Margin Algorithm
    @staticmethod
    @jit(target_backend='cuda')    
    def FMP_algorithm(X, y, w_init, b_init, fixed_margin, idx, s, lambda_param, eta, max_updates, p):
        t = 0
        iterations = 0
        w = w_init
        b = b_init
        if p == 'inf':
            norm_w = LA.norm(w, ord=1)
        elif p == '1':
            norm_w = LA.norm(w, ord=np.inf)
        else: # p == 2
            norm_w = sqrt(sum(w**2))
        
        last_t = -1
        lambda_t = 0
        alpha = np.zeros((X.shape[0]))
        while True:
            last_t = t
            e=0
            for k in range(0, y.shape[0]):
                i = int(idx[k])
                if(y[i]*(np.dot(X[i,:], w)+b) <= fixed_margin * norm_w - lambda_param * alpha[i]):
                    if norm_w != 0:
                        lambda_t = 1 - (eta*fixed_margin)/norm_w
                    else:
                        lambda_t = 1
                    alpha = alpha * lambda_t
                    alpha[i] = alpha[i] + eta  
                    if p == 'inf':
                        w = w - eta * (fixed_margin * np.sign(w) - y[i] * X[i,:])
                        norm_w = LA.norm(w, ord=1)
                    elif p == '1':
                        for j in range(len(w)):
                            if abs(w[j]) == norm_w:
                                w[j] = w[j] - eta * (fixed_margin * np.sign(w[j])/sum(np.abs(w) == norm_w) - y[i] * X[i,j])
                            elif abs(w[j]) < norm_w:
                                w[j] = w[j] + eta * (y[i] * X[i,j])
                        norm_w = LA.norm(w, ord=np.inf)
                    else: # p == 2
                        w = w * lambda_t + eta * y[i] * X[i,:]
                        norm_w = sqrt(sum(w**2))
                    b = b + eta*y[i]
                    t += 1
                    e += 1
                    if k > s:
                        s += 1
                        j = s
                    else:
                        j=e
                    idx[k], idx[j] = idx[j], idx[k]
            iterations += 1
            if (t > max_updates or last_t == t):
                break
        if t<= max_updates:
            convergence=1
        else:
            convergence=0
        return w, b, convergence, t, iterations, idx, s

    # IMA Algorithm
    def IM_algorithm(self, X, y):
        self.w = np.ones(self.w_rbf.shape[0]) * 0.000001
        self.ws = [] 
        self.bs = [] 
        self.ws.append(self.w)
        self.bs.append(self.b)
        fixed_margin = 0#compute_margin(X, y, self.w_RBF, self.b)
        t = 0
        convergence = 1
        updates=0
        iterations=0
        margin=[]
        margin.append(fixed_margin)
        idx = np.linspace(0, y.shape[0]-1, y.shape[0])
        s=0
        while convergence==1 and t<self.IMA_iterations:
            w, b, convergence, updates_, iterations_, idx, s = self.FMP_algorithm(X, y, self.w, self.b, fixed_margin, idx, s, self.lambda_param, self.eta, self.max_updates, self.p)
            if convergence == 1:
                self.w = w
                self.b = b
                self.ws.append(self.w)
                self.bs.append(self.b)
            updates += updates_
            iterations += iterations_
            
            if self.p == 'inf':
                norm_w = LA.norm(w, ord=1)
            elif self.p == '1':
                norm_w = LA.norm(w, ord=np.inf)
            else: # p == 2
                norm_w = sqrt(sum(w**2))
            
            gamma1 = []
            gamma2 = []
            for i in range(0, y.shape[0]):
                if y[i] == 1:
                    gamma1.append((y[i]*(np.dot(X[i], self.w)+self.b))/norm_w)
                else:
                    gamma2.append((y[i]*(np.dot(X[i], self.w)+self.b))/norm_w)
            gamma1 = np.array(gamma1)
            gamma2 = np.array(gamma2)
            gamma1 = gamma1[gamma1>=0]
            gamma2 = gamma2[gamma2>=0]
            if len(gamma1) == 0:
                min_gamma1 = 0
            else:
                min_gamma1 = min(gamma1)
            if len(gamma2) == 0:
                min_gamma2 = 0
            else:
                min_gamma2 = min(gamma2)
            fixed_margin = max([(min_gamma1 + min_gamma2)/2, (1+self.delta_margin)*fixed_margin])
            #margin.append(compute_margin(X, y, self.w, self.b))
            t += 1
        return t, updates, iterations, margin

    # Function that manage the training of IMA RBF
    def fit(self, X, y):
        N = X.shape[0] # number of samples
        n = X.shape[1] # samples dimension
        
        if self.centers == []:
            # Applying K-mean to separate the clusters:
            kmeans = KMeans(n_clusters=self.n_neurons).fit(X)
            # Capture the centers:
            self.centers = kmeans.cluster_centers_

        pairwise_dist = pairwise_distances(X, self.centers)

        self.sigma = np.mean(np.mean(pairwise_dist, axis=1))
        
        self.H = self.pdfnvar(pairwise_dist, self.sigma)

        #self.H = np.hstack((self.H, np.ones((self.H.shape[0], 1))))

        self.w_rbf = np.dot(np.linalg.pinv(self.H), y)
        
        iterations_IMA, updates, iterations, margin = self.IM_algorithm(self.H, y) 
        
        return iterations_IMA, updates, iterations, margin
            
    # Function to apply IMA RBF model
    def predict(self, X, use_IMA_w=True):
        pairwise_dist = pairwise_distances(X, self.centers)
        H = self.pdfnvar(pairwise_dist, self.sigma)
        #H = np.hstack((H, np.ones((H.shape[0], 1))))
        if use_IMA_w == True:
            y_predicted = np.sign(np.dot(H, self.w) + self.b)
        else:
            y_predicted = np.sign(np.dot(H,  self.w_rbf))
        y_predicted[y_predicted==0]=-1
        return y_predicted

In [None]:
def results(X, y, n_splits, p, eta, IMA_iterations, lambda_param):    
      
    print(f'Parameters: p={p}, eta={eta}, lambda={lambda_param}')

    # Stratified k fold cross validation

    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=72)
    i=0
    
    train_accuracy_IM_RBF_p2 = np.zeros(n_splits)
    test_accuracy_IM_RBF_p2 = np.zeros(n_splits)
    margin_IM_RBF_p2 = np.zeros(n_splits)
    updates_p2 = np.zeros(n_splits)
    iterations_IMA_p2 = np.zeros(n_splits)
    iterations_FMP_p2 = np.zeros(n_splits)
    norm_L0_IM_RBF_p2_1 = np.zeros(n_splits)
    norm_L0_IM_RBF_p2_2 = np.zeros(n_splits)
    norm_L0_IM_RBF_p2_3 = np.zeros(n_splits)
    norm_L0_IM_RBF_p2_4 = np.zeros(n_splits)
          
    train_accuracy_IM_RBF_p1 = np.zeros(n_splits)
    test_accuracy_IM_RBF_p1 = np.zeros(n_splits)
    margin_IM_RBF_p1 = np.zeros(n_splits)
    updates_p1 = np.zeros(n_splits)
    iterations_IMA_p1 = np.zeros(n_splits)
    iterations_FMP_p1 = np.zeros(n_splits)
    norm_L0_IM_RBF_p1_1 = np.zeros(n_splits)
    norm_L0_IM_RBF_p1_2 = np.zeros(n_splits)
    norm_L0_IM_RBF_p1_3 = np.zeros(n_splits)
    norm_L0_IM_RBF_p1_4 = np.zeros(n_splits)

    train_accuracy_IM_RBF_pinf = np.zeros(n_splits)
    test_accuracy_IM_RBF_pinf = np.zeros(n_splits)
    margin_IM_RBF_pinf = np.zeros(n_splits)
    updates_pinf = np.zeros(n_splits)
    iterations_IMA_pinf = np.zeros(n_splits)
    iterations_FMP_pinf = np.zeros(n_splits)
    norm_L0_IM_RBF_pinf_1 = np.zeros(n_splits)
    norm_L0_IM_RBF_pinf_2 = np.zeros(n_splits)
    norm_L0_IM_RBF_pinf_3 = np.zeros(n_splits)
    norm_L0_IM_RBF_pinf_4 = np.zeros(n_splits)

    train_accuracy_RBF = np.zeros(n_splits)
    test_accuracy_RBF = np.zeros(n_splits)
    margin_RBF = np.zeros(n_splits)
    norm_L0_RBF_1 = np.zeros(n_splits)
    norm_L0_RBF_2 = np.zeros(n_splits)
    norm_L0_RBF_3 = np.zeros(n_splits)
    norm_L0_RBF_4 = np.zeros(n_splits)

    margins=[]
        
    for train_index, test_index in kf.split(X, y):
        X_train = X[train_index,:]
        X_test = X[test_index,:]
        y_train = y[train_index]
        y_test = y[test_index]
               
        # RBF-IMA 2
        clf = RBF_IMA(n_neurons=p, eta=eta, lambda_param=lambda_param[1], delta_margin=10^-3, IMA_iterations=IMA_iterations, max_updates=10000, p='2')
        iterations_IMA_p2[i], updates_p2[i], iterations_FMP_p2[i], margin = clf.fit(X_train, y_train)
        centers = clf.centers
        margins.append(margin)
        y_hat=clf.predict(X_test, use_IMA_w = True)
        y_hat_train=clf.predict(X_train, use_IMA_w = True)
        margin_IM_RBF_p2[i] = compute_margin(clf.H[:,:], y_train, clf.w, clf.b)
        train_accuracy_IM_RBF_p2[i] = accuracy_score(y_train, y_hat_train)   
        test_accuracy_IM_RBF_p2[i] = accuracy_score(y_test, y_hat)
        w = clf.w
        norm_w = LA.norm(w, ord=2)
        w = w/norm_w
        norm_L0_IM_RBF_p2_1[i] = L0_norm(w, 0.2 * w.max())
        norm_L0_IM_RBF_p2_2[i] = L0_norm(w, 0.1 * w.max())
        norm_L0_IM_RBF_p2_3[i] = L0_norm(w, 0.01 * w.max())
        norm_L0_IM_RBF_p2_4[i] = L0_norm(w, 0.001 * w.max())
        
        # RBF-IMA inf
        clf = RBF_IMA(n_neurons=p, eta=eta, lambda_param=lambda_param[2], delta_margin=10^-3, IMA_iterations=IMA_iterations, max_updates=10000, p='inf')
        clf.centers = centers
        iterations_IMA_pinf[i], updates_pinf[i], iterations_FMP_pinf[i], margin = clf.fit(X_train, y_train)
        margins.append(margin)
        y_hat=clf.predict(X_test, use_IMA_w = True)
        y_hat_train=clf.predict(X_train, use_IMA_w = True)
        margin_IM_RBF_pinf[i] = compute_margin(clf.H[:,:], y_train, clf.w, clf.b)
        train_accuracy_IM_RBF_pinf[i] = accuracy_score(y_train, y_hat_train)   
        test_accuracy_IM_RBF_pinf[i] = accuracy_score(y_test, y_hat)
        w = clf.w
        norm_w = LA.norm(w, ord=2)
        w = w/norm_w
        norm_L0_IM_RBF_pinf_1[i] = L0_norm(w, 0.2 * w.max())
        norm_L0_IM_RBF_pinf_2[i] = L0_norm(w, 0.1 * w.max())
        norm_L0_IM_RBF_pinf_3[i] = L0_norm(w, 0.01 * w.max())
        norm_L0_IM_RBF_pinf_4[i] = L0_norm(w, 0.001 * w.max())

        # RBF-IMA 1
        clf = RBF_IMA(n_neurons=p, eta=eta, lambda_param=lambda_param[0], delta_margin=10^-3, IMA_iterations=IMA_iterations, max_updates=10000, p='1')
        clf.centers = centers
        iterations_IMA_p1[i], updates_p1[i], iterations_FMP_p1[i], margin = clf.fit(X_train, y_train)
        margins.append(margin)
        y_hat=clf.predict(X_test, use_IMA_w = True)
        y_hat_train=clf.predict(X_train, use_IMA_w = True)
        margin_IM_RBF_p1[i] = compute_margin(clf.H[:,:], y_train, clf.w, clf.b)
        train_accuracy_IM_RBF_p1[i] = accuracy_score(y_train, y_hat_train)   
        test_accuracy_IM_RBF_p1[i] = accuracy_score(y_test, y_hat)
        w = clf.w
        norm_w = LA.norm(w, ord=2)
        w = w/norm_w
        norm_L0_IM_RBF_p1_1[i] = L0_norm(w, 0.2 * w.max())
        norm_L0_IM_RBF_p1_2[i] = L0_norm(w, 0.1 * w.max())
        norm_L0_IM_RBF_p1_3[i] = L0_norm(w, 0.01 * w.max())
        norm_L0_IM_RBF_p1_4[i] = L0_norm(w, 0.001 * w.max())

                
        # RBF
        y_hat=clf.predict(X_test, use_IMA_w = False)
        y_hat_train=clf.predict(X_train, use_IMA_w = False)
        margin_RBF[i] = compute_margin(clf.H[:,:], y_train, clf.w_rbf, 0)
        train_accuracy_RBF[i] = accuracy_score(y_train, y_hat_train)   
        test_accuracy_RBF[i] = accuracy_score(y_test, y_hat)
        i+=1
        
    print("********* Results RBF-IMA p=2 **************")
    print("Acc train: " + '{:.4f}'.format(train_accuracy_IM_RBF_p2.mean())+ "+/-" + '{:.4f}'.format(train_accuracy_IM_RBF_p2.std()))
    print("Acc test: " + '{:.4f}'.format(test_accuracy_IM_RBF_p2.mean()) + "+/-" + '{:.4f}'.format(test_accuracy_IM_RBF_p2.std()))
    print("Iterations IMA: " + '{:.4f}'.format(iterations_IMA_p2.mean())+ "+/-" + '{:.4f}'.format(iterations_IMA_p2.std()))
    print("Updates: " + '{:.4f}'.format(updates_p2.mean())+ "+/-" + '{:.4f}'.format(updates_p2.std()))
    print("Margin: " + '{:.9f}'.format(margin_IM_RBF_p2.mean())+ "+/-" + '{:.9f}'.format(margin_IM_RBF_p2.std()))
    print("Norm L0 (20%): " + '{:.9f}'.format(norm_L0_IM_RBF_p2_1.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_p2_1.std()))
    print("Norm L0 (10%): " + '{:.9f}'.format(norm_L0_IM_RBF_p2_2.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_p2_2.std()))
    print("Norm L0 (1%): " + '{:.9f}'.format(norm_L0_IM_RBF_p2_3.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_p2_3.std()))
    print("Norm L0 (0.1%): " + '{:.9f}'.format(norm_L0_IM_RBF_p2_4.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_p2_4.std()))

    print("********* Results RBF-IMA p=1 **************")
    print("Acc train: " + '{:.4f}'.format(train_accuracy_IM_RBF_p1.mean())+ "+/-" + '{:.4f}'.format(train_accuracy_IM_RBF_p1.std()))
    print("Acc test: " + '{:.4f}'.format(test_accuracy_IM_RBF_p1.mean()) + "+/-" + '{:.4f}'.format(test_accuracy_IM_RBF_p1.std()))
    print("Iterations IMA: " + '{:.4f}'.format(iterations_IMA_p1.mean())+ "+/-" + '{:.4f}'.format(iterations_IMA_p1.std()))
    print("Updates: " + '{:.4f}'.format(updates_p1.mean())+ "+/-" + '{:.4f}'.format(updates_p1.std()))
    print("Margin: " + '{:.9f}'.format(margin_IM_RBF_p1.mean())+ "+/-" + '{:.9f}'.format(margin_IM_RBF_p1.std()))
    print("Norm L0 (20%): " + '{:.9f}'.format(norm_L0_IM_RBF_p1_1.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_p1_1.std()))
    print("Norm L0 (10%): " + '{:.9f}'.format(norm_L0_IM_RBF_p1_2.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_p1_2.std()))
    print("Norm L0 (1%): " + '{:.9f}'.format(norm_L0_IM_RBF_p1_3.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_p1_3.std()))
    print("Norm L0 (0.1%): " + '{:.9f}'.format(norm_L0_IM_RBF_p1_4.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_p1_4.std()))

    print("********* Results RBF-IMA p=inf **************")
    print("Acc train: " + '{:.4f}'.format(train_accuracy_IM_RBF_pinf.mean())+ "+/-" + '{:.4f}'.format(train_accuracy_IM_RBF_pinf.std()))
    print("Acc test: " + '{:.4f}'.format(test_accuracy_IM_RBF_pinf.mean()) + "+/-" + '{:.4f}'.format(test_accuracy_IM_RBF_pinf.std()))
    print("Iterations IMA: " + '{:.4f}'.format(iterations_IMA_pinf.mean())+ "+/-" + '{:.4f}'.format(iterations_IMA_pinf.std()))
    print("Updates: " + '{:.4f}'.format(updates_pinf.mean())+ "+/-" + '{:.4f}'.format(updates_pinf.std()))
    print("Margin: " + '{:.9f}'.format(margin_IM_RBF_pinf.mean())+ "+/-" + '{:.9f}'.format(margin_IM_RBF_pinf.std()))
    print("Norm L0 (20%): " + '{:.9f}'.format(norm_L0_IM_RBF_pinf_1.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_pinf_1.std()))
    print("Norm L0 (10%): " + '{:.9f}'.format(norm_L0_IM_RBF_pinf_2.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_pinf_2.std()))
    print("Norm L0 (1%): " + '{:.9f}'.format(norm_L0_IM_RBF_pinf_3.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_pinf_3.std()))
    print("Norm L0 (0.1%): " + '{:.9f}'.format(norm_L0_IM_RBF_pinf_4.mean())+ "+/-" + '{:.9f}'.format(norm_L0_IM_RBF_pinf_4.std()))
    
    
    print("********* Results RBF **************")
    print("Acc train: " + '{:.4f}'.format(train_accuracy_RBF.mean())+ "+/-" + '{:.4f}'.format(train_accuracy_RBF.std()))
    print("Acc test: " + '{:.4f}'.format(test_accuracy_RBF.mean()) + "+/-" + '{:.4f}'.format(test_accuracy_RBF.std()))
    print("Margin: " + '{:.9f}'.format(margin_RBF.mean())+ "+/-" + '{:.9f}'.format(margin_RBF.std()))    
    return margins

In [None]:
def grid_l(X, y, p):
    # GridSearch for lambda and learning rate of IMA ELM
    parameters = {'lambda_param':[0.01, 0.1, 1, 10, 100]}
    clf_1 = RBF_IMA(n_neurons=p, delta_margin=10^-3, IMA_iterations=20, max_updates=10000, p="1")
    clf_1 = GridSearchCV(clf_1, parameters, scoring='accuracy', cv=10, verbose=0)
    clf_1.fit(X, y)

    clf_2 = RBF_IMA(n_neurons=p, delta_margin=10^-3, IMA_iterations=20, max_updates=10000, p="2")
    clf_2 = GridSearchCV(clf_2, parameters, scoring='accuracy', cv=10, verbose=0)
    clf_2.fit(X, y)

    clf_inf = RBF_IMA(n_neurons=p, delta_margin=10^-3, IMA_iterations=20, max_updates=10000, p="inf")
    clf_inf = GridSearchCV(clf_inf, parameters, scoring='accuracy', cv=10, verbose=0)
    clf_inf.fit(X, y)

    return [clf_1.best_params_['lambda_param'], clf_2.best_params_['lambda_param'], clf_inf.best_params_['lambda_param']]

In [None]:
def run(X,y):
    # Normalizing data:
    X = preprocessing.normalize(X, axis=0)
    #C = grid_C(X, y)
    n = len(X)
    if n>1000:
        n=1000
    for p in [int(n/3), int(n/5), int(n/7)]:
        l = grid_l(X, y, p=p)
        print(f"Experimento com {p} neurônios:" )
        m = results(X, y, n_splits=10, p=p, eta=0.1, IMA_iterations=20, lambda_param=l)
        print("\n\n")

In [None]:
print("IRIS")
iris = datasets.load_iris()
X = iris.data
# setosa - 0, versicolor - 1, virginica - 2  
y = iris.target 
# O problema agora possui apenas as classes y=-1 e y=1
y[y>0] = 1
y[y==0] = -1
run(X, y)

In [None]:
print("SYNTHETIC")
synthetic_dataset = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/synthetic_dataset/synthetic_control.data', sep="\s+",  header=None, engine='python')
X = synthetic_dataset.to_numpy()
y = np.concatenate((np.ones(100), np.ones(200)*-1, np.ones(100), np.ones(100)*-1,np.ones(100)))
run(X, y)

In [None]:
print("IONOSPHERE")
ionosphere_dataset = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/Ionosphere/ionosphere.data', names=list(range(0,35)), sep=',')
y = ionosphere_dataset[34].to_numpy()
X = ionosphere_dataset.drop([34], axis='columns').to_numpy()
y[np.where(y=='g')] = 1
y[np.where(y=='b')] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("WINE")
wine_dataset = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/wine/wine.data', names=['Class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315', 'Proline'])
# convert to array
y = wine_dataset[['Class']].to_numpy()
X = wine_dataset.drop("Class",axis='columns').to_numpy()
y[np.where(y==3)] = 1
y[np.where(y==2)] = -1
run(X, y)

In [None]:
print("WDBC")
wdbc_dataset = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/WDBC/wdbc.data', names=list(range(0,32)))
# convert to array
y = wdbc_dataset[1].to_numpy()
X = wdbc_dataset.drop([0, 1],axis='columns').to_numpy()
y[np.where(y=='B')] = 1
y[np.where(y=='M')] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("SONAR")
sonar_dataset = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/sonar/sonar.all-data', names=list(range(0,61)), sep=',')
y = sonar_dataset[60].to_numpy()
X = sonar_dataset.drop([60], axis='columns').to_numpy()
y[np.where(y=='R')] = 1
y[np.where(y=='M')] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("DIABETES")
pima_dataset = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/diabetes/diabetes.csv', sep=",", engine='python')
y = pima_dataset['Outcome'].to_numpy()
X = pima_dataset.drop(['Outcome'], axis='columns').to_numpy()
y[np.where(y==0)] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("HEART")
statlog_dataset = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/statlog/heart.dat', sep=" ", header=None, engine='python')
y = statlog_dataset[13].to_numpy()
X = statlog_dataset.drop([13], axis='columns').to_numpy()
y[np.where(y==2)] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("HABERMAN")
haberman = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/haberman/haberman.data', sep=",", header=None, engine='python')
y = haberman[3].to_numpy()
X = haberman.drop([3], axis='columns').to_numpy()
y[np.where(y==2)] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("TRANSFUSION")
transfusion = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/transfusion/transfusion.data', sep=",", engine='python')
y = transfusion["whether he/she donated blood in March 2007"].to_numpy()
X = transfusion.drop(["whether he/she donated blood in March 2007"], axis='columns').to_numpy()
y[np.where(y==0)] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("AUSTRALIAN")
australian = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/australian_credit/australian.dat', header=None, sep=" ", engine='python')
australian = australian.replace("?", np.nan)
australian = australian.dropna()
y = australian[14].to_numpy()
X = australian.drop([14], axis='columns').to_numpy()
y[np.where(y==0)] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("BREAST")
breast = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/breast/breast.data', header=None, sep=",", engine='python')
breast = breast.replace("?", np.nan)
breast = breast.dropna()
y = breast[10].to_numpy()
X = breast.drop([0, 10], axis='columns').to_numpy()
y[np.where(y==4)] = 1
y[np.where(y==2)] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
# Loading dataset:
print("GLASS")
headers = ["Id", "RI", "Na", "Mg", "Al", "Si", "K", "Ca", "Ba", "Fe", "Class"]
df = pd.read_csv("~/Documents/UFMG/Graduation/10/Reconhecimento de padrões/list/pattern-recognition-exercises/list_5/databases/glass.csv", names = headers)
X = df.drop("Class", axis=1)
X = X.drop("Id", axis=1)
y = df["Class"]
X = X.to_numpy()
y = y.to_numpy()
y[np.where(y>1)] = -1
run(X, y)

In [None]:
# read in banknote authentication set
print("BANKNOTE")
banknotes = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/banknote/data_banknote_authentication.txt', names=['variance', 'skewness', 'curtosis', 'entropy', 'class'])
# convert to array
X = banknotes[['variance', 'skewness', 'curtosis', 'entropy']].to_numpy()
y = banknotes[['class']].to_numpy()
y[np.where(y==0)] = -1
run(X, y)

In [None]:
print("MUSHROOM")
df = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/Mushroom/agaricus-lepiota.data', delimiter =',', header=None)
df = df.replace("?", np.nan) 
df = df.dropna() 
y = df[0].to_numpy()
X = df.drop([0], axis='columns')
X = pd.get_dummies(X).to_numpy()
y[np.where(y=='e')] = -1
y[np.where(y=='p')] = 1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("ROBOT")
robot_dataset = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/robot/lp4_data.csv', delimiter =',')
X = robot_dataset.to_numpy().reshape([117,90])
y = np.concatenate((np.ones(24), np.ones(117-24)*-1))
run(X, y)

In [None]:
print("MAMMOGRAPHIC")
mammo = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/mammographic/mammographic_masses.data', sep=",", header=None, engine='python')
mammo = mammo.replace("?", np.nan)
mammo = mammo.dropna()
y = mammo[5].to_numpy()
X = mammo.drop([5], axis='columns').to_numpy()
y[np.where(y==0)] = -1
y = np.array(y.tolist())
run(X, y)

In [None]:
print("SPAM")
spam = pd.read_csv('~/Documents/UFMG/Mastering/article/data sets/spam/spambase.data', header=None, sep=",", engine='python')
y = spam[57].to_numpy()
X = spam.drop([57], axis='columns').to_numpy()
y[np.where(y==0)] = -1
y = np.array(y.tolist())
run(X, y)