In [141]:
import numpy as np
import scipy.io as sio
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from deslib.des import METADES
from sklearn.metrics import precision_recall_fscore_support


In [89]:
D = np.loadtxt("/Users/AnhVu/Study/Machine_learning/Data/convert/csv/abalone.csv", delimiter=',')

n_instances = D.shape[0]
n_features = D.shape[1] - 1

print(D.shape)

cv = sio.loadmat("/Users/AnhVu/Study/Machine_learning/Data/convert/cv/cv_abalone.mat")['cv']

print(cv.shape)

(4174, 9)
(1, 100)


In [75]:
n_folds = 10
n_iters = 3
validation_rate = 0.3
rng = np.random.RandomState(42)

print(n_instances)
print(n_features)

4174
8


In [151]:
all_ids = np.array(range(D.shape[0]))

accuracies = np.zeros(n_iters * n_folds,)
precisions_macro = np.zeros(n_iters * n_folds,)
recalls_macro = np.zeros(n_iters * n_folds,)
f1s_macro = np.zeros(n_iters * n_folds,)

precisions_micro = np.zeros(n_iters * n_folds,)
recalls_micro = np.zeros(n_iters * n_folds,)
f1s_micro = np.zeros(n_iters * n_folds,)

for i_iter in range(n_iters):
    base_loop = i_iter * n_folds;
    for i_fold in range(n_folds):
        current_loop = base_loop + i_fold
        
        # subtract one since index in python starts from 0 (matlab from 0)
        test_ids = cv[0, current_loop][:, 0] - 1 
        train_ids = np.setdiff1d(all_ids, test_ids)
        
        X_train = D[train_ids, :-1]
        Y_train = D[train_ids, -1]
        
        X_test = D[test_ids, :-1]
        Y_test = D[test_ids, -1]
        
        X_train, X_dev, Y_train, Y_dev = train_test_split(X_train, Y_train,
                                                    test_size=0.3,
                                                    random_state=rng)
        
        model_knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, Y_train)
        model_nb = GaussianNB().fit(X_train, Y_train)
        model_lda = LinearDiscriminantAnalysis().fit(X_train, Y_train)
        
        pool_classifiers = [model_lda, model_nb, model_knn]
        metades = METADES(pool_classifiers)
        
        metades.fit(X_dev, Y_dev)
        
        accuracy = metades.score(X_test, Y_test)
        
        print('Classification accuracy of META-DES: ', accuracy)
        Y_pred = metades.predict(X_test)
        support_macro = precision_recall_fscore_support(Y_test, Y_pred, average='macro')
        support_micro = precision_recall_fscore_support(Y_test, Y_pred, average='micro')
        
        accuracies[current_loop] = accuracy
        
        precisions_macro[current_loop] = support_macro[0]
        recalls_macro[current_loop] = support_macro[1]
        f1s_macro[current_loop] = support_macro[2]
        
        precisions_micro[current_loop] = support_micro[0]
        recalls_micro[current_loop] = support_micro[1]
        f1s_micro[current_loop] = support_micro[2]

print(accuracies)
print(f1s_macro)
print(f1s_micro)

        

Classification accuracy of META-DES:  0.5419664268585132
Classification accuracy of META-DES:  0.5287081339712919
Classification accuracy of META-DES:  0.5406698564593302
Classification accuracy of META-DES:  0.5430622009569378
Classification accuracy of META-DES:  0.5167464114832536
Classification accuracy of META-DES:  0.5635491606714629
Classification accuracy of META-DES:  0.5179856115107914
Classification accuracy of META-DES:  0.5371702637889688
Classification accuracy of META-DES:  0.5371702637889688
Classification accuracy of META-DES:  0.5587529976019184
Classification accuracy of META-DES:  0.565947242206235
Classification accuracy of META-DES:  0.5191387559808612
Classification accuracy of META-DES:  0.507177033492823
Classification accuracy of META-DES:  0.49521531100478466
Classification accuracy of META-DES:  0.5334928229665071
Classification accuracy of META-DES:  0.539568345323741
Classification accuracy of META-DES:  0.5059952038369304
Classification accuracy of META-D

In [140]:
accuracies_mean = np.mean(accuracies)
accuracies_var = np.var(accuracies)

# macro
precisions_macro_mean = np.mean(precisions_macro)
precisions_macro_var = np.var(precisions_macro)

recalls_macro_mean = np.mean(recalls_macro)
recalls_macro_var = np.var(recalls_macro)

f1s_macro_mean = np.mean(f1s_macro)
f1s_macro_var = np.var(f1s_macro)

# micro
precisions_micro_mean = np.mean(precisions_micro)
precisions_micro_var = np.var(precisions_micro)

recalls_micro_mean = np.mean(recalls_micro)
recalls_micro_var = np.var(recalls_micro)

f1s_micro_mean = np.mean(f1s_micro)
f1s_micro_var = np.var(f1s_micro)


np.savetxt('test.dat', accuracies, delimiter=',', fmt='%0.6e')

f = open("test.dat", "a")
f.write("----------\n")
f.write("Mean:\n")
f.write("{0:6E}\n".format(np.mean(accuracies)))
f.write("Variance:\n")
f.write("{0:6E}".format(accuracies_var))
f.close()

0.0005405719426893039
0.0005405719426893039


In [150]:
# a = np.array([4.316547e-01,4.712919e-01,4.497608e-01,4.712919e-01,4.904306e-01,4.796163e-01,5.083933e-01,4.652278e-01,4.460432e-01,4.628297e-01,3.980815e-01,4.808612e-01,4.808612e-01,4.617225e-01,4.712919e-01,4.484412e-01,4.892086e-01,4.484412e-01,4.724221e-01,4.604317e-01,4.724221e-01,4.760766e-01,5.047847e-01,4.665072e-01,5.000000e-01,4.556355e-01,4.484412e-01,4.316547e-01,4.580336e-01,4.412470e-01])
# print(a.shape)
# a_mean = np.mean(a)
# print(np.square(a - accuracies_mean))
# a_var = np.sum(np.square(a - a_mean))/30
# print(a_mean)
# print(a_var)
# print(np.var(a))

y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
a = precision_recall_fscore_support(y_true, y_pred, average='macro')
print(a)
2*a[0]*a[1]/(a[0] + a[1])


(0.2222222222222222, 0.3333333333333333, 0.26666666666666666, None)


0.26666666666666666