In [141]:
import numpy as np
import scipy.io as sio
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from deslib.des import METADES
from sklearn.metrics import precision_recall_fscore_support


In [89]:
D = np.loadtxt("/Users/AnhVu/Study/Machine_learning/Data/convert/csv/abalone.csv", delimiter=',')

n_instances = D.shape[0]
n_features = D.shape[1] - 1

print(D.shape)

cv = sio.loadmat("/Users/AnhVu/Study/Machine_learning/Data/convert/cv/cv_abalone.mat")['cv']

print(cv.shape)

(4174, 9)
(1, 100)


In [75]:
n_folds = 10
n_iters = 3
validation_rate = 0.3
rng = np.random.RandomState(42)

print(n_instances)
print(n_features)

4174
8


In [153]:
all_ids = np.array(range(D.shape[0]))

errors = np.zeros(n_iters * n_folds,)
precisions_macro = np.zeros(n_iters * n_folds,)
recalls_macro = np.zeros(n_iters * n_folds,)
f1s_macro = np.zeros(n_iters * n_folds,)

precisions_micro = np.zeros(n_iters * n_folds,)
recalls_micro = np.zeros(n_iters * n_folds,)
f1s_micro = np.zeros(n_iters * n_folds,)

for i_iter in range(n_iters):
    base_loop = i_iter * n_folds;
    for i_fold in range(n_folds):
        current_loop = base_loop + i_fold
        
        # subtract one since index in python starts from 0 (matlab from 0)
        test_ids = cv[0, current_loop][:, 0] - 1 
        train_ids = np.setdiff1d(all_ids, test_ids)
        
        X_train = D[train_ids, :-1]
        Y_train = D[train_ids, -1]
        
        X_test = D[test_ids, :-1]
        Y_test = D[test_ids, -1]
        
        X_train, X_dev, Y_train, Y_dev = train_test_split(X_train, Y_train,
                                                    test_size=0.3,
                                                    random_state=rng)
        
        model_knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, Y_train)
        model_nb = GaussianNB().fit(X_train, Y_train)
        model_lda = LinearDiscriminantAnalysis().fit(X_train, Y_train)
        
        pool_classifiers = [model_lda, model_nb, model_knn]
        metades = METADES(pool_classifiers)
        
        metades.fit(X_dev, Y_dev)
        
        accuracy = metades.score(X_test, Y_test)
        
        print('Classification accuracy of META-DES: ', accuracy)
        Y_pred = metades.predict(X_test)
        support_macro = precision_recall_fscore_support(Y_test, Y_pred, average='macro')
        support_micro = precision_recall_fscore_support(Y_test, Y_pred, average='micro')
        
        errors[current_loop] = 1 - accuracy
        
        precisions_macro[current_loop] = support_macro[0]
        recalls_macro[current_loop] = support_macro[1]
        f1s_macro[current_loop] = support_macro[2]
        
        precisions_micro[current_loop] = support_micro[0]
        recalls_micro[current_loop] = support_micro[1]
        f1s_micro[current_loop] = support_micro[2]

print(errors)
print(f1s_macro)
print(f1s_micro)

        

Classification accuracy of META-DES:  0.5323741007194245
Classification accuracy of META-DES:  0.5239234449760766
Classification accuracy of META-DES:  0.5645933014354066
Classification accuracy of META-DES:  0.5598086124401914
Classification accuracy of META-DES:  0.49521531100478466
Classification accuracy of META-DES:  0.5227817745803357
Classification accuracy of META-DES:  0.5083932853717026
Classification accuracy of META-DES:  0.5179856115107914
Classification accuracy of META-DES:  0.5155875299760192
Classification accuracy of META-DES:  0.5443645083932853
Classification accuracy of META-DES:  0.5587529976019184
Classification accuracy of META-DES:  0.5263157894736842
Classification accuracy of META-DES:  0.5167464114832536
Classification accuracy of META-DES:  0.48564593301435405
Classification accuracy of META-DES:  0.5358851674641149
Classification accuracy of META-DES:  0.5491606714628298
Classification accuracy of META-DES:  0.4940047961630695
Classification accuracy of ME

In [152]:
def write_file(array, filename):
    array_mean = np.mean(array)
    array_var = np.var(array)
    np.savetxt(filename, array, delimiter=',', fmt='%0.6e')
    f = open(filename, "a")
    f.write("----------\n")
    f.write("Mean:\n")
    f.write("{0:6E}\n".format(array_mean))
    f.write("Variance:\n")
    f.write("{0:6E}".format(array_var))
    f.close()

In [154]:
write_file(errors, "err.dat")
write_file(precisions_macro, "precision_macro.dat")
write_file(recalls_macro, "recall_macro.dat")
write_file(f1s_macro, "f1_macro.dat")

write_file(precisions_micro, "precision_micro.dat")
write_file(recalls_micro, "recall_micro.dat")
write_file(f1s_micro, "f1_micro.dat")