# 1. Import libraries

In [1]:
#----------------------------Reproducible----------------------------------------------------------------------------------------
import numpy as np
import tensorflow as tf
import random as rn
import os

seed=0
os.environ['PYTHONHASHSEED'] = str(seed)

np.random.seed(seed)
rn.seed(seed)
#session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
session_conf =tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

from keras import backend as K

#tf.set_random_seed(seed)
tf.compat.v1.set_random_seed(seed)
#sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)

K.set_session(sess)
#----------------------------Reproducible----------------------------------------------------------------------------------------

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

#--------------------------------------------------------------------------------------------------------------------------------
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline
matplotlib.style.use('ggplot')

import random
import scipy.sparse as sparse
import scipy.io

from keras.utils import to_categorical
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from skfeature.utility import construct_W
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from skfeature.utility import construct_W
from skfeature.utility.sparse_learning import feature_ranking
import time
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer 
import pandas as pd

#--------------------------------------------------------------------------------------------------------------------------------
#Import ourslef defined methods
import sys
sys.path.append(r"../Defined")
import Functions as F

Using TensorFlow backend.


# 2. Loading data

In [2]:
data_path="../Dataset/GLIOMA.mat"
Data = scipy.io.loadmat(data_path)

data_arr_=Data['X']
label_arr=Data['Y'][:, 0]

data_arr=MinMaxScaler(feature_range=(0,1)).fit_transform(data_arr_)

label_arr_onehot=label_arr

In [3]:
key_feture_number=64

# 3 .Calculation

In [4]:
#--------------------------------------------------------------------------------------------------------------------------------
def write_to_csv(p_data,p_path):
    dataframe = pd.DataFrame(p_data)
    dataframe.to_csv(p_path, mode='a',header=False,index=False,sep=',')
    del dataframe

#--------------------------------------------------------------------------------------------------------------------------------       
def mse_check(train, test):
    LR = LinearRegression(n_jobs = -1)
    LR.fit(train[0], train[1])
    MSELR = ((LR.predict(test[0]) - test[1]) ** 2).mean()
    return MSELR

def next_batch(samples, labels, num):
    # Return a total of `num` random samples and labels.
    idx = np.random.choice(len(samples), num)

    return samples[idx], labels[idx]

def standard_single_hidden_layer_autoencoder(X, units, O):
    reg_alpha = 1e-3
    D = X.shape[1]
    weights = tf.get_variable("weights", [D, units])
    biases = tf.get_variable("biases", [units])
    X = tf.matmul(X, weights) + biases
    X = tf.layers.dense(X, O, kernel_regularizer = tf.contrib.layers.l2_regularizer(reg_alpha))
    return X, weights

def aefs_subset_selector(train, K, epoch_num=1000, alpha=0.1):
    D = train[0].shape[1]
    O = train[1].shape[1]
    learning_rate = 0.001
    
    tf.reset_default_graph()
    
    X = tf.placeholder(tf.float32, (None, D))
    TY = tf.placeholder(tf.float32, (None, O))
    Y, weights = standard_single_hidden_layer_autoencoder(X, K, O)
    
    loss = tf.reduce_mean(tf.square(TY - Y)) + alpha * tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(weights), axis=1)), axis=0) + tf.losses.get_total_loss()
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
    init = tf.global_variables_initializer()
    
    batch_size = 8
    batch_per_epoch = train[0].shape[0] // batch_size
    
    costs = []
    
    session_config = tf.ConfigProto()
    session_config.gpu_options.allow_growth = False
    
    with tf.Session(config = session_config) as sess:
        sess.run(init)
        for ep in range(epoch_num):
            cost = 0
            for batch_n in range(batch_per_epoch):
                imgs, yimgs = next_batch(train[0], train[1], batch_size)
                _, c, p = sess.run([train_op, loss, weights], feed_dict = {X: imgs, TY: yimgs})
                cost += c / batch_per_epoch
            costs.append(cost)
            
    return list(np.argmax(np.abs(p), axis=0)), costs

def AEFS(train, test, K, debug = True):
    x_train, x_val, y_train, y_val = train_test_split(train[0], train[1], test_size = 0.1)
    print("y_train.shape",y_train.shape)
    bindices = []
    bmse = 1e100
    for alpha in [1e-3, 1e-1, 1e1, 1e3]:
        print("alpha",alpha)
        indices, _ = aefs_subset_selector(train, K)
        mse = mse_check((train[0][:, indices], train[1]), (x_val[:, indices], y_val))
        if bmse > mse:
            bmse = mse
            bindices = indices
    if debug:
        print(bindices, bmse)
    return train[0][:, bindices], test[0][:, bindices]
 
#--------------------------------------------------------------------------------------------------------------------------------       
def cal(p_data_arr,\
        p_label_arr_onehot,\
        p_key_feture_number,\
        p_seed):
    
    C_train_x,C_test_x,C_train_y,C_test_y= train_test_split(p_data_arr,p_label_arr_onehot,test_size=0.2,random_state=p_seed)

    os.environ['PYTHONHASHSEED'] = str(p_seed)
    np.random.seed(p_seed)
    rn.seed(p_seed)
    
    train=(C_train_x,C_train_x)
    test=(C_test_x,C_test_x)
    
    #--------------------------------------------------------------------------------------------------------------------------------
    t_start = time.time()

    C_train_selected_x, C_test_selected_x = AEFS((train[0], train[0]), (test[0], test[0]),  key_feture_number)

    t_used=time.time() - t_start
    
    write_to_csv(np.array([t_used]),"./log/AEFS_time.csv")
    
    # Classification on original features
    train_feature=C_train_x
    train_label=C_train_y
    test_feature=C_test_x
    test_label=C_test_y
    
    orig_train_acc,orig_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)
    
    # Classification on selected features
    train_feature=C_train_selected_x
    train_label=C_train_y
    test_feature=C_test_selected_x
    test_label=C_test_y

    selec_train_acc,selec_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)

    # Linear reconstruction
    train_feature_tuple=(C_train_selected_x,C_train_x)
    test_feature_tuple=(C_test_selected_x,C_test_x)

    reconstruction_loss=mse_check(train_feature_tuple, test_feature_tuple)
    results=np.array([orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss])
    
    write_to_csv(results.reshape(1,len(results)),"./log/AEFS_results.csv")
    
    return orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss

In [5]:
p_data_arr=data_arr
p_label_arr_onehot=label_arr_onehot
p_key_feture_number=key_feture_number

In [None]:
for p_seed in np.arange(0,50):
    orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss=cal(p_data_arr,\
                                                                                        p_label_arr_onehot,\
                                                                                        p_key_feture_number,\
                                                                                        p_seed)

y_train.shape (36, 4434)
alpha 0.001
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
alpha 0.1
alpha 10.0
alpha 1000.0
[1999, 1536, 1947, 3386, 1419, 2324, 2490, 694, 2991, 2091, 1415, 2905, 4296, 2046, 2998, 3762, 2841, 4046, 4234, 1118, 326, 1212, 1353, 2963, 1751, 2696, 1205, 85, 4389, 805, 1683, 3258, 4373, 43, 2345, 1218, 1118, 2516, 3187, 2671, 2913, 3696, 2248, 1385, 969, 980, 3636, 1412, 3696, 1537, 3400, 2964, 334, 856, 2309, 780, 3293, 689, 3288, 4063, 3846, 4207, 1210, 3678] 7.05450927702547e-31
Training accuracy： 1.0
Training accuracy： 