# 1. Import libraries

In [1]:
#----------------------------Reproducible----------------------------------------------------------------------------------------
import numpy as np
import random as rn
import os

seed=0
os.environ['PYTHONHASHSEED'] = str(seed)

np.random.seed(seed)
rn.seed(seed)

#----------------------------Reproducible----------------------------------------------------------------------------------------

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

#--------------------------------------------------------------------------------------------------------------------------------
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline
matplotlib.style.use('ggplot')

import random
import scipy.sparse as sparse
import scipy.io

from keras.utils import to_categorical
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from skfeature.utility import construct_W
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from skfeature.function.sparse_learning_based.UDFS import udfs
from skfeature.utility import construct_W
from skfeature.utility.sparse_learning import feature_ranking
import time
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer 
import pandas as pd

#--------------------------------------------------------------------------------------------------------------------------------
#Import ourslef defined methods
import sys
sys.path.append(r"../Defined")
import Functions as F

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# 2. Loading data

In [2]:
data_frame=pd.read_excel('../Dataset/Data_Cortex_Nuclear.xls',sheet_name='Hoja1')

data_arr=(np.array(data_frame)[:,1:78]).copy()
label_arr=(np.array(data_frame)[:,81]).copy()

for index_i in np.arange(len(label_arr)):
    if label_arr[index_i]=='c-CS-s':
        label_arr[index_i]='0'
    if label_arr[index_i]=='c-CS-m':
        label_arr[index_i]='1'
    if label_arr[index_i]=='c-SC-s':
        label_arr[index_i]='2'
    if label_arr[index_i]=='c-SC-m':
        label_arr[index_i]='3'
    if label_arr[index_i]=='t-CS-s':
        label_arr[index_i]='4'
    if label_arr[index_i]=='t-CS-m':
        label_arr[index_i]='5'
    if label_arr[index_i]=='t-SC-s':
        label_arr[index_i]='6'
    if label_arr[index_i]=='t-SC-m':
        label_arr[index_i]='7'

label_arr_onehot=label_arr#to_categorical(label_arr)

imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
imp_mean.fit(data_arr)
data_arr=imp_mean.transform(data_arr)

scaler = StandardScaler()
scaled = scaler.fit_transform(data_arr)

In [3]:
key_feture_number=10

# 3 .Calculation

In [4]:
#--------------------------------------------------------------------------------------------------------------------------------
def write_to_csv(p_data,p_path):
    dataframe = pd.DataFrame(p_data)
    dataframe.to_csv(p_path, mode='a',header=False,index=False,sep=',')
    del dataframe

#--------------------------------------------------------------------------------------------------------------------------------       
def mse_check(train, test):
    LR = LinearRegression(n_jobs = -1)
    LR.fit(train[0], train[1])
    MSELR = ((LR.predict(test[0]) - test[1]) ** 2).mean()
    return MSELR

def udfs_used(train, test, K, debug = False):
    x_train, x_val, y_train, y_val = train_test_split(train[0], train[1], test_size = 0.1)
    bindices = []
    bmse = 1e100
    for gamma in [1e-3, 1e-1, 1e0, 1e1]:#le3
        print("gamma",gamma)
        W = udfs(x_train,verbose=debug, gamma = gamma, max_iter = 100)
        indices = feature_ranking(W)[: K]
        mse = mse_check((train[0][:, indices], train[1]), (x_val[:, indices], y_val))
        if bmse > mse:
            bmse = mse
            bindices = indices
    if debug:
        print(bindices, bmse)
    return train[0][:, bindices], test[0][:, bindices]
 
#--------------------------------------------------------------------------------------------------------------------------------       
def cal(p_data_arr,\
        p_label_arr_onehot,\
        p_key_feture_number,\
        p_seed):
    
    C_train_x,C_test_x,C_train_y,C_test_y= train_test_split(p_data_arr,p_label_arr_onehot,test_size=0.2,random_state=p_seed)

    os.environ['PYTHONHASHSEED'] = str(p_seed)
    np.random.seed(p_seed)
    rn.seed(p_seed)
    
    train_feature=C_train_x
    train_label=C_train_y
    test_feature=C_test_x
    test_label=C_test_y
    
    #--------------------------------------------------------------------------------------------------------------------------------
    train=(C_train_x,C_train_x)
    test=(C_test_x,C_test_x)

    t_start = time.time()

    C_train_selected_x, C_test_selected_x = udfs_used(train, test, p_key_feture_number)
    
    t_used=time.time() - t_start
    
    write_to_csv(np.array([t_used]),"./log/UDFS_time.csv")

    # Classification on original features
    train_feature=C_train_x
    train_label=C_train_y
    test_feature=C_test_x
    test_label=C_test_y
    
    orig_train_acc,orig_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)
    
    # Classification on selected features
    
    train_feature=C_train_selected_x
    train_label=C_train_y
    test_feature=C_test_selected_x
    test_label=C_test_y

    selec_train_acc,selec_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)

    # Linear reconstruction
    train_feature_tuple=(C_train_selected_x,C_train_x)
    test_feature_tuple=(C_test_selected_x,C_test_x)

    reconstruction_loss=mse_check(train_feature_tuple, test_feature_tuple)
    results=np.array([orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss])
    
    write_to_csv(results.reshape(1,len(results)),"./log/UDFS_results.csv")
    
    return orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss

In [5]:
p_data_arr=data_arr
p_label_arr_onehot=label_arr_onehot
p_key_feture_number=key_feture_number

In [6]:
for p_seed in np.arange(0,50):
    orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss=cal(p_data_arr,\
                                                                                        p_label_arr_onehot,\
                                                                                        p_key_feture_number,\
                                                                                        p_seed)

gamma 0.001
gamma 0.1
gamma 1.0
gamma 10.0
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 1.0
Testing accuracy： 1.0
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9583333333333334
Testing accuracy： 0.9583333333333334
gamma 0.001
gamma 0.1
gamma 1.0
gamma 10.0
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 1.0
Testing accuracy： 1.0
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9907407407407407
Testing accuracy： 0.9907407407407407
gamma 0.001
gamma 0.1
gamma 1.0
gamma 10.0
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9953703703703703
Testing accuracy： 0.9953703703703703
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9537037037037037
Testing accuracy： 0.9537037037037037
gamma 0.001
gamma 0.1
gamma 1.0
gamma 10.0
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9907407407407407
Testing accuracy： 0.9907407407407407
Training accuracy： 1.0
Training accuracy： 1.0
Te