# 1. Import libraries

In [1]:
#----------------------------Reproducible----------------------------------------------------------------------------------------
import numpy as np
import random as rn
import os

seed=0
os.environ['PYTHONHASHSEED'] = str(seed)

np.random.seed(seed)
rn.seed(seed)
#----------------------------Reproducible----------------------------------------------------------------------------------------

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import pandas as pd
import scipy.sparse as sparse
import scipy.io
from sklearn.linear_model import LinearRegression
import time
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

#--------------------------------------------------------------------------------------------------------------------------------
#Import ourslef defined methods
import sys
sys.path.append(r"../Defined")
import Functions as F

# 2. Loading data

In [2]:
train_data_arr=np.array(pd.read_csv('../Dataset/final_X_train.txt',header=None))
test_data_arr=np.array(pd.read_csv('../Dataset/final_X_test.txt',header=None))
train_label_arr=(np.array(pd.read_csv('../Dataset/final_y_train.txt',header=None))-1)
test_label_arr=(np.array(pd.read_csv('../Dataset/final_y_test.txt',header=None))-1)

data_arr=np.r_[train_data_arr,test_data_arr]
label_arr=np.r_[train_label_arr,test_label_arr]
label_arr_onehot=label_arr#to_categorical(label_arr)
print(data_arr.shape)
print(label_arr_onehot.shape)

data_arr=MinMaxScaler(feature_range=(0,1)).fit_transform(data_arr)

(5744, 561)
(5744, 1)


In [3]:
key_feture_number=50

# 3. Calculation

In [4]:
#--------------------------------------------------------------------------------------------------------------------------------
def IsnanAndIsinf(p_data):
    p_data=np.array(p_data)
    for i in np.arange(p_data.shape[0]):
        for j in np.arange(p_data.shape[1]):
            if np.isnan(p_data[i,j]) or np.isinf(p_data[i,j]):
                p_data[i,j]=0
    return p_data

#--------------------------------------------------------------------------------------------------------------------------------
def write_to_csv(p_data,p_path):
    dataframe = pd.DataFrame(p_data)
    dataframe.to_csv(p_path, mode='a',header=False,index=False,sep=',')
    del dataframe

#--------------------------------------------------------------------------------------------------------------------------------       
def mse_check(train, test):
    LR = LinearRegression(n_jobs = -1)
    LR.fit(train[0], train[1])
    MSELR = ((LR.predict(test[0]) - test[1]) ** 2).mean()
    return MSELR

#--------------------------------------------------------------------------------------------------------------------------------       
def InfFS(p_data_arr,p_alpha,use_specify_number=False,specify_number=50):
    df = pd.DataFrame(p_data_arr)
    corr_ij_spearman__=df.corr(method ='spearman')
    corr_ij_spearman_=IsnanAndIsinf(corr_ij_spearman__)
    corr_ij_spearman=1-np.abs(corr_ij_spearman_)
    
    STD=np.std(p_data_arr,axis=0)
    
    STDMatrix_=np.zeros((STD.shape[0],STD.shape[0]))
    for i in np.arange(STD.shape[0]):
        for j in np.arange(STD.shape[0]):
            STDMatrix_[i,j]=max(STD[i],STD[j])
            
    STDMatrix_min=STDMatrix_-np.min(STDMatrix_)
    STDMatrix_max=np.max(STDMatrix_min)
    STDMatrix__=STDMatrix_min/STDMatrix_max
    
    STDMatrix=IsnanAndIsinf(STDMatrix__)
    
    N=p_data_arr.shape[1]
    
    eps = (5e-06) * N;
    factor = 1 - eps
    
    A =  ( p_alpha*STDMatrix + (1-p_alpha)*corr_ij_spearman )

    rho = np.max(np.sum(A,axis=1))

    A = A / (rho+eps)
    
    I = np.eye(A.shape[0])
    
    r = factor/rho
    
    y = I - ( r * A )
    S=np.linalg.inv(y)
    
    WEIGHT = np.sum( S , axis=1 )
    RANKED=np.argsort(-WEIGHT)
    
    RANKED = RANKED
    WEIGHT = WEIGHT
    
    e = np.ones(N)
    t = np.dot(S, e)

    nbins = 0.5*N
    
    cnts, bins = np.histogram(t, bins=int(nbins))
    
    thr =np.mean(cnts)
    
    size_sub = np.sum(cnts>thr)
    
    if use_specify_number:
        size_sub=specify_number
    
    SUBSET = RANKED[0:size_sub]
    
    return SUBSET

#--------------------------------------------------------------------------------------------------------------------------------       
def cal(p_data_arr,\
        p_label_arr_onehot,\
        p_key_feture_number,\
        p_seed):
    C_train_x,C_test_x,C_train_y,C_test_y= train_test_split(p_data_arr,p_label_arr_onehot,test_size=0.2,random_state=p_seed)
    os.environ['PYTHONHASHSEED'] = str(p_seed)
    np.random.seed(p_seed)
    rn.seed(p_seed)
    
    #--------------------------------------------------------------------------------------------------------------------------------
    train_feature=C_train_x
    test_feature=C_test_x

    t_start = time.time()
    
    train_idx=InfFS(train_feature,p_alpha,use_specify_number=True,specify_number=p_key_feture_number)

    t_used=time.time() - t_start
    
    write_to_csv(np.array([t_used]),"./log/InfFS_time.csv")
    
    C_train_selected_x = train_feature[:, train_idx]
    
    test_idx=InfFS(test_feature,p_alpha,use_specify_number=True,specify_number=p_key_feture_number)
    C_test_selected_x = test_feature[:, test_idx]

    # Classification on original features
    train_feature=C_train_x
    train_label=C_train_y
    test_feature=C_test_x
    test_label=C_test_y
    
    orig_train_acc,orig_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)
    
    # Classification on selected features
    train_feature=C_train_selected_x
    train_label=C_train_y
    test_feature=C_test_selected_x
    test_label=C_test_y

    selec_train_acc,selec_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)

    # Linear reconstruction
    train_feature_tuple=(C_train_selected_x,C_train_x)
    test_feature_tuple=(C_test_selected_x,C_test_x)

    reconstruction_loss=mse_check(train_feature_tuple, test_feature_tuple)
    results=np.array([orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss])
    
    write_to_csv(results.reshape(1,len(results)),"./log/InfFS_results.csv")
    
    return orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss

In [5]:
p_data_arr=data_arr
p_alpha=0.5
p_label_arr_onehot=label_arr_onehot
p_key_feture_number=key_feture_number

In [None]:
for p_seed in np.arange(0,50):
    orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss=cal(p_data_arr,\
                                                                                        p_label_arr_onehot,\
                                                                                        p_key_feture_number,\
                                                                                        p_seed)

  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9251523063533508
Testing accuracy： 0.9251523063533508


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.18102697998259357
Testing accuracy： 0.18102697998259357


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9338555265448216
Testing accuracy： 0.9338555265448216


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.308964316797215
Testing accuracy： 0.308964316797215


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9416884247171453
Testing accuracy： 0.9416884247171453


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.18798955613577023
Testing accuracy： 0.18798955613577023


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9321148825065274
Testing accuracy： 0.9321148825065274


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.2819843342036554
Testing accuracy： 0.2819843342036554


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9216710182767625
Testing accuracy： 0.9216710182767625


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.24369016536118362
Testing accuracy： 0.24369016536118362


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9295039164490861
Testing accuracy： 0.9295039164490861


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.32724107919930373
Testing accuracy： 0.32724107919930373


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9260226283724978
Testing accuracy： 0.9260226283724978


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.18276762402088773
Testing accuracy： 0.18276762402088773


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9373368146214099
Testing accuracy： 0.9373368146214099


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.25326370757180156
Testing accuracy： 0.25326370757180156


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9425587467362925
Testing accuracy： 0.9425587467362925


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.15926892950391644
Testing accuracy： 0.15926892950391644


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9321148825065274
Testing accuracy： 0.9321148825065274


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.25935596170583114
Testing accuracy： 0.25935596170583114


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9295039164490861
Testing accuracy： 0.9295039164490861


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.30461270670147955
Testing accuracy： 0.30461270670147955


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9408181026979983
Testing accuracy： 0.9408181026979983


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.18798955613577023
Testing accuracy： 0.18798955613577023


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9303742384682332
Testing accuracy： 0.9303742384682332


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.24369016536118362
Testing accuracy： 0.24369016536118362


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9164490861618799
Testing accuracy： 0.9164490861618799


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.30374238468233244
Testing accuracy： 0.30374238468233244


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9312445604873804
Testing accuracy： 0.9312445604873804


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.16449086161879894
Testing accuracy： 0.16449086161879894


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9373368146214099
Testing accuracy： 0.9373368146214099


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.3533507397737163
Testing accuracy： 0.3533507397737163


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9260226283724978
Testing accuracy： 0.9260226283724978


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.2628372497824195
Testing accuracy： 0.2628372497824195


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9312445604873804
Testing accuracy： 0.9312445604873804


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.2793733681462141
Testing accuracy： 0.2793733681462141


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9216710182767625
Testing accuracy： 0.9216710182767625


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.1566579634464752
Testing accuracy： 0.1566579634464752


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9416884247171453
Testing accuracy： 0.9416884247171453


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.257615317667537
Testing accuracy： 0.257615317667537


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9355961705831157
Testing accuracy： 0.9355961705831157


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.16623150565709313
Testing accuracy： 0.16623150565709313


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.939077458659704
Testing accuracy： 0.939077458659704


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.18189730200174065
Testing accuracy： 0.18189730200174065


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9234116623150566
Testing accuracy： 0.9234116623150566


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.20539599651871193
Testing accuracy： 0.20539599651871193


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9338555265448216
Testing accuracy： 0.9338555265448216


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.15926892950391644
Testing accuracy： 0.15926892950391644


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9347258485639687
Testing accuracy： 0.9347258485639687


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.11836379460400348
Testing accuracy： 0.11836379460400348


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9260226283724978
Testing accuracy： 0.9260226283724978


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.2758920800696258
Testing accuracy： 0.2758920800696258


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9321148825065274
Testing accuracy： 0.9321148825065274


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.30113141862489123
Testing accuracy： 0.30113141862489123


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9242819843342036
Testing accuracy： 0.9242819843342036


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.21671018276762402
Testing accuracy： 0.21671018276762402


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9329852045256745
Testing accuracy： 0.9329852045256745


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.2750217580504787
Testing accuracy： 0.2750217580504787


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9373368146214099
Testing accuracy： 0.9373368146214099


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.2402088772845953
Testing accuracy： 0.2402088772845953


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9295039164490861
Testing accuracy： 0.9295039164490861


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.2950391644908616
Testing accuracy： 0.2950391644908616


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9286335944299391
Testing accuracy： 0.9286335944299391


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.1801566579634465
Testing accuracy： 0.1801566579634465


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9303742384682332
Testing accuracy： 0.9303742384682332


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.20800696257615317
Testing accuracy： 0.20800696257615317


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9303742384682332
Testing accuracy： 0.9303742384682332


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.1496953872932985
Testing accuracy： 0.1496953872932985


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.918189730200174
Testing accuracy： 0.918189730200174


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.11836379460400348
Testing accuracy： 0.11836379460400348


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9338555265448216
Testing accuracy： 0.9338555265448216


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.14360313315926893
Testing accuracy： 0.14360313315926893


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9268929503916449
Testing accuracy： 0.9268929503916449


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.2280243690165361
Testing accuracy： 0.2280243690165361


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9347258485639687
Testing accuracy： 0.9347258485639687


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.24281984334203655
Testing accuracy： 0.24281984334203655


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9442993907745866
Testing accuracy： 0.9442993907745866


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.14795474325500435
Testing accuracy： 0.14795474325500435


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.917319408181027
Testing accuracy： 0.917319408181027


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.1462140992167102
Testing accuracy： 0.1462140992167102
