This code generates Shapley-CMI valuation results (used to output Table 2)， which are then recorded in {dataset}/{dataset}_data_values.csv.
This code also includes the entropy calculation reported in Figure 5.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.stats import entropy
from sklearn.utils import resample

In [2]:
# load dataset
def load_dataset(name, feature_num, discret_cat=5):
    datafile = './{}/{}.csv'.format(name, name)
    data_pd = pd.read_csv(datafile)
    feature_names = []
    for i in range (1,feature_num+1):
        feature_name = 'f'+str(i)
        if discret_cat > 0: # need discretization
            data_pd[feature_name+'_c'] = pd.cut(data_pd[feature_name], discret_cat, labels = list(range(discret_cat)))
            feature_name += '_c'
        feature_names.append(feature_name)
    data_pd.head()
    y = data_pd['y']
    x = data_pd[feature_names]
    return x, y, feature_names

In [3]:
# for calculating mutual information
from collections import Counter
def our_entropy(labels): # H(A)
    pro_dict = Counter(labels) #计数
    s = sum(pro_dict.values())#总数
    probs = np.array([i/s for i in pro_dict.values()])#概率
    return - probs.dot(np.log(probs))
def MI_(s1,s2):# 互信息
    s_s_1=["%s%s"%(i,j) for i,j in zip(s1,s2)]
    MI_1=our_entropy(s1)+our_entropy(s2)-our_entropy(s_s_1)
    return MI_1
def N_MI(s1,s2): # 标准化互信息
    MI_1 = MI_(s1,s2)
    NMI_1 = MI_1/(our_entropy(s1)*our_entropy(s2))**0.5
    return NMI_1

In [4]:
# get all the permutations of the features and then calculate conditional mutual information regarding Y
import itertools

x, y, feature_names = load_dataset('wine', 13) 
#x, y, feature_names = load_dataset('parkinsons', 22)
#x, y, feature_names = load_dataset('breast', 30)
#x, y, feature_names = load_dataset('spect', 22, 0)
#x, y, feature_names = load_dataset('winequality-red', 11)
#x, y, feature_names = load_dataset('winequality-white', 11)


X_train,X_test,Y_train,Y_test = train_test_split(x, y, test_size=0.2, random_state=0)

# all_feature_permutations = list(itertools.permutations(feature_names)) # time-consuming if feature number is large

contribution = {}
for feature_name in feature_names:
    contribution[feature_name] = []

Y_value_list = Y_train.values.tolist()

# for each_permutation in all_feature_permutations:
random_permutation_times = 10000
for i in range(random_permutation_times): # random sample permutations for certain times
    each_permutation = np.random.permutation(feature_names)
    current_feature_set = []
    current_MI = 0
    for feature_name in each_permutation:
        current_feature_set.append(feature_name)
        x_new = X_train[current_feature_set]
        new_MI = MI_(Y_value_list, list(x_new.itertuples(index=False)))
        contr = new_MI - current_MI # conditional CMI of the current feature in the specific permutation
        contribution[feature_name].append(contr) # add the CMI together in all the permutations
        current_MI = new_MI
    i += 1
    
    if i%100 == 0:
        features_values = [0]*len(feature_names)
        for feature_i in range(len(feature_names)):
            feature_name = feature_names[feature_i]
            features_values[feature_i] = np.mean(contribution[feature_name])
        features_values = np.array(features_values)/np.sum(features_values) #normalization, not necessary, for ease of presentation
        print ('permutation', i, features_values, 'entropy', entropy(features_values)) #entropy to quantify the change of feature values

print("final Shapley-CMI after {} samplings".format(random_permutation_times))
for feature_value in features_values:
    print(feature_value)


permutation 100 [0.1058653  0.07061077 0.02918043 0.05825907 0.07412079 0.05968489
 0.09429726 0.06477699 0.06309475 0.13181919 0.06854576 0.10537096
 0.07437385] entropy 2.5102387399018364
permutation 200 [0.10604904 0.06393946 0.03508873 0.05630491 0.06066411 0.05549252
 0.10382823 0.0658469  0.0569289  0.11698094 0.08480403 0.10253606
 0.09153616] entropy 2.5138856551307143
permutation 300 [0.10028958 0.06126035 0.03297776 0.05653788 0.05988036 0.062754
 0.1140431  0.06198962 0.05162017 0.10451374 0.086037   0.11125727
 0.09683915] entropy 2.5094467758623797
permutation 400 [0.10411159 0.06412846 0.03418192 0.05737122 0.05644418 0.06369134
 0.10628099 0.06113646 0.05140823 0.10662749 0.08882708 0.10794081
 0.09785025] entropy 2.5118399991501277
permutation 500 [0.09947234 0.05999112 0.03443933 0.05843086 0.05553035 0.06594487
 0.1097468  0.06020738 0.05085592 0.11079482 0.08384402 0.10974212
 0.10100008] entropy 2.5089322382513077
permutation 600 [0.09780715 0.06109601 0.03445109 0.

permutation 4500 [0.09224299 0.05758708 0.03317551 0.05034989 0.05744218 0.0730755
 0.11791618 0.05462254 0.05377763 0.11791571 0.08553332 0.10886083
 0.09750066] entropy 2.501850944384106
permutation 4600 [0.0922259  0.05733122 0.03305082 0.05070034 0.05764856 0.07262726
 0.11815791 0.05440098 0.05404279 0.11814597 0.08518006 0.10937389
 0.0971143 ] entropy 2.50162119462113
permutation 4700 [0.09248555 0.05720379 0.03304236 0.05069386 0.05752278 0.07273374
 0.11719372 0.05429745 0.05394882 0.11801988 0.08584134 0.10975053
 0.09726619] entropy 2.5016518998390653
permutation 4800 [0.09222444 0.057181   0.0330129  0.05064385 0.05727904 0.07312363
 0.11764814 0.05413529 0.05378828 0.11831537 0.08569557 0.10970266
 0.09724984] entropy 2.5012002592085767
permutation 4900 [0.0924386  0.05732191 0.03302545 0.05065106 0.05722352 0.07317148
 0.11811165 0.05413078 0.05388339 0.11755094 0.08593222 0.10957898
 0.09698001] entropy 2.5014444118740444
permutation 5000 [0.09204692 0.05719242 0.0329251

permutation 8900 [0.0887515  0.05718127 0.03345444 0.05181283 0.058375   0.0745803
 0.11901766 0.05420517 0.05408845 0.11672811 0.08423416 0.11139046
 0.09618064] entropy 2.502974069817786
permutation 9000 [0.088653   0.05723296 0.03337818 0.05183772 0.05845528 0.07435623
 0.1190394  0.05416822 0.05412666 0.1165529  0.08436476 0.11140628
 0.09642841] entropy 2.5029549292422892
permutation 9100 [0.08884321 0.05714764 0.03335774 0.05175616 0.05855813 0.07436403
 0.11886738 0.05413493 0.0540047  0.11676062 0.08456102 0.11162117
 0.09602327] entropy 2.502807879322986
permutation 9200 [0.08887417 0.05698364 0.03335942 0.05158174 0.05870342 0.07445862
 0.11914221 0.05401724 0.05396191 0.11670857 0.08454284 0.11171527
 0.09595094] entropy 2.5025568561202287
permutation 9300 [0.08868161 0.05693041 0.03338455 0.05174173 0.05854429 0.07449853
 0.11937159 0.0539432  0.05393249 0.11647748 0.08451174 0.11201484
 0.09596753] entropy 2.502457134872425
permutation 9400 [0.08891986 0.05682361 0.0334094