In [1]:
#importing modules
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
import random, arff
import os, sys, subprocess

In [2]:
file_name = os.listdir(".")
#number of labels
col = list(range(0, 6))

In [3]:
#importing the files
test = pd.read_csv('Music_a_test1.arff',sep=',',header=None).iloc[:,col]
BR   = pd.read_csv('BR-Music_a_test1.arff',sep=',',header=None).iloc[:,col]
CC   = pd.read_csv('CC-Music_a_test1.arff',sep=',',header=None).iloc[:,col]
CDN  = pd.read_csv('CDN-Music_a_test1.arff',sep=',',header=None).iloc[:,col]
CDT  = pd.read_csv('CDT-Music_a_test1.arff',sep=',',header=None).iloc[:,col]
CT   = pd.read_csv('CT-Music_a_test1.arff',sep=',',header=None).iloc[:,col]

In [4]:
majority_voting = pd.DataFrame([])
for x in range(len(test)):
    data_row = pd.DataFrame(pd.concat([BR.iloc[[x]],CC.iloc[[x]],CDN.iloc[[x]],CDT.iloc[[x]],CT.iloc[[x]]], axis = 0 ))
    vote = data_row.mode()  
    majority_voting = majority_voting.append(vote)

In [5]:
def pre_cal(y_true, y_pred, print_all = False):
    if(y_true.shape != y_pred.shape):
        print("Wrong y_preds matrics!")

    real_pos = real_neg = pred_pos = pred_neg  = true_pos = true_neg = []

    for i in range(y_true.shape[0]):
        # real values - RP and RN
        real_pos = np.asarray(np.append(real_pos,np.logical_and(y_true[i], y_true[i]).sum()), dtype=np.int64).reshape(-1,1)
        real_neg = np.asarray(np.append(real_neg,np.logical_and(np.logical_not(y_true[i]),np.logical_not(y_true[i])).sum()), dtype=np.int64).reshape(-1,1)

        # y_pred values - PP and PN
        pred_pos = np.asarray(np.append(pred_pos,np.logical_and(y_pred[i], y_pred[i]).sum()),dtype=np.int64).reshape(-1,1)
        pred_neg = np.asarray(np.append(pred_neg,np.logical_and(np.logical_not(y_pred[i]), np.logical_not(y_pred[i])).sum()),dtype=np.int64).reshape(-1,1)

        # true labels - TP and TN
        true_pos = np.asarray(np.append(true_pos,np.logical_and(y_true[i], y_pred[i]).sum()),dtype=np.int64).reshape(-1,1)
        true_neg = np.asarray(np.append(true_neg,np.logical_and(np.logical_not(y_true[i]), np.logical_not(y_pred[i])).sum()),dtype=np.int64).reshape(-1,1)

    if print_all:
		# if print_all = True - it prints RP, RN, PP, PN, TP and TN
        result = np.concatenate((real_pos, real_neg, pred_pos, pred_neg, true_pos, true_neg), axis=1)
        print(result)

    return(real_pos, real_neg, pred_pos, pred_neg, true_pos, true_neg)

#function to resolve divide by zero error and accept the value 0 when divided by 0
def divideZero( value_a, value_b):
    with np.errstate(divide='ignore', invalid='ignore'):
        result = np.true_divide( value_a, value_b )
        result[ ~ np.isfinite( result )] = 0
    return result

def accuracy(y_true, y_pred):
    #return the accuracy - example based
    real_pos, real_neg, pred_pos, pred_neg, true_pos, true_neg = pre_cal(y_true,y_pred)
    score = (true_pos + true_neg)/(pred_pos + pred_neg)
    score = np.mean(score)
    return score


def precision(y_true, y_pred):
    #return precision - example based
    real_pos, real_neg, pred_pos, pred_neg, true_pos, true_neg = pre_cal(y_true,y_pred)
    score = divideZero(true_pos, pred_pos)
    score = np.mean(score)
    return score

def recall(y_true, y_pred):
    #return precision - example based
    real_pos, real_neg, pred_pos, pred_neg, true_pos, true_neg = pre_cal(y_true,y_pred)
    score = divideZero(true_pos, real_pos)
    score = np.mean(score)
    return score


def fscore(y_true, y_pred,beta = 1):
	#return f(beta)score - example based : default beta value is 1
    prec, rec = precision(y_true, y_pred), recall(y_true, y_pred)
    beta_val = beta*beta
    score = ((1+beta_val)*(prec*rec))/(beta_val*(prec+rec))
    return score


def hamloss(y_true, y_pred):
	#return hamming loss - example based
    hamloss = list()
    for i in range(y_true.shape[0]):
        hamloss = np.asarray(np.append(hamloss,np.logical_xor(y_true[i], y_pred[i]).sum()), dtype=np.int64).reshape(-1,1)
    score = (hamloss.sum())/((y_true.shape[0])*(y_true.shape[1]))
    return score


def subset(y_true, y_pred):
	#return subset accuracy - example based
    subset_matrix = list()
    for i in range(y_true.shape[0]):
        subset_matrix = np.asarray(np.append(subset_matrix, np.array_equal(y_true[i],y_pred[i])), dtype=np.int64).reshape(-1,1)
    score = (subset_matrix.sum())/((y_true.shape[0])*(y_true.shape[1]))
    return score

def zeroloss(y_true, y_pred):
    #return new array with removed element having all zero in y_true
    condition = list()
    index = list()
    for i in range(y_true.shape[0]):
        new_true = new_pred = list()
        condition = np.logical_and(y_true[i],y_true[i]).sum()
        if (condition==0):
            index = np.asarray(np.append(index,i), dtype = np.int64)

        new_true = np.delete(y_true,index, axis = 0)
        new_pred = np.delete(y_pred,index, axis = 0)
    return new_true, new_pred

def microprecision(y_true, y_pred):
    #return micro-precision
    real_pos, real_neg, pred_pos, pred_neg, true_pos, true_neg = pre_cal(y_true,y_pred)
    score = true_pos.sum()/pred_pos.sum()
    return score

def microrecall(y_true, y_pred):
    #return micro-recall
    real_pos, real_neg, pred_pos, pred_neg, true_pos, true_neg = pre_cal(y_true,y_pred)
    score = true_pos.sum()/real_pos.sum()
    return score

def microfscore(y_true, y_pred,beta = 1):
    #return micro-fscore
    prec, rec = microprecision(y_true, y_pred), microrecall(y_true, y_pred)
    beta_val = beta*beta
    score = ((1+beta_val)*(prec*rec))/(beta_val*(prec+rec))
    return score

def macroprecision(y_true, y_pred):
    #return macro-precision
    real_pos, real_neg, pred_pos, pred_neg, true_pos, true_neg = pre_cal(y_true,y_pred)
    score = divideZero(true_pos, pred_pos)
    return score

def macrorecall(y_true, y_pred):
    #return macro-recall
    real_pos, real_neg, pred_pos, pred_neg, true_pos, true_neg = pre_cal(y_true,y_pred)
    score = divideZero(true_pos, real_pos)
    return score

def macrofscore(y_true, y_pred,beta = 1):
    #return macro-fscore
    prec, rec = macroprecision(y_true, y_pred), macrorecall(y_true, y_pred)
    beta_val = beta*beta
    score = divideZero(((1+beta_val)*(prec*rec)),(beta_val*(prec+rec)))
    score = np.mean(score)
    return score

def calculate_all(np_test, np_pred):
    print("Accuracy : {}".format(accuracy(np_test,np_pred)))
    print("Precision: {}".format(precision(np_test,np_pred)))
    print("Recall   : {}".format(recall(np_test,np_pred)))
    print("F1-Score : {}".format(fscore(np_test,np_pred)))
    print("HammingL : {}".format(hamloss(np_test,np_pred)))
    print("Subset   : {}".format(subset(np_test,np_pred)))
    print("Micro - F1-Score : {}".format(microfscore(np_test,np_pred)))
    print("Macro - F1-Score : {}".format(macrofscore(np_test,np_pred)))

In [6]:
#result form majority voting
np_test = test.values
np_pred = majority_voting.values
calculate_all(np_test, np_pred)

Accuracy : 0.7668350168350168
Precision: 0.5946969696969697
Recall   : 0.6085858585858586
F1-Score : 0.6015612579096314
HammingL : 0.23316498316498316
Subset   : 0.037037037037037035
Micro - F1-Score : 0.6231292517006803
Macro - F1-Score : 0.5715969215969215
