# 1. Import libraries

In [1]:
#----------------------------Reproducible----------------------------------------------------------------------------------------
import numpy as np
import random as rn
import os

seed=0
os.environ['PYTHONHASHSEED'] = str(seed)

np.random.seed(seed)
rn.seed(seed)

#----------------------------Reproducible----------------------------------------------------------------------------------------

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

#--------------------------------------------------------------------------------------------------------------------------------
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline
matplotlib.style.use('ggplot')

import random
import scipy.sparse as sparse
import scipy.io

from keras.utils import to_categorical
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from skfeature.function.similarity_based import lap_score
from skfeature.utility import construct_W
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer 
import pandas as pd

#--------------------------------------------------------------------------------------------------------------------------------
#Import ourslef defined methods
import sys
sys.path.append(r"../Defined")
import Functions as F

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# 2. Loading data

In [2]:
train_data_arr=np.array(pd.read_csv('../Dataset/final_X_train.txt',header=None))
test_data_arr=np.array(pd.read_csv('../Dataset/final_X_test.txt',header=None))
train_label_arr=(np.array(pd.read_csv('../Dataset/final_y_train.txt',header=None))-1)
test_label_arr=(np.array(pd.read_csv('../Dataset/final_y_test.txt',header=None))-1)

data_arr=np.r_[train_data_arr,test_data_arr]
label_arr=np.r_[train_label_arr,test_label_arr]
label_arr_onehot=label_arr#to_categorical(label_arr)
print(data_arr.shape)
print(label_arr_onehot.shape)

data_arr=MinMaxScaler(feature_range=(0,1)).fit_transform(data_arr)

(5744, 561)
(5744, 1)


In [3]:
key_feture_number=50

# 3 .Calculation

In [4]:
#--------------------------------------------------------------------------------------------------------------------------------
def write_to_csv(p_data,p_path):
    dataframe = pd.DataFrame(p_data)
    dataframe.to_csv(p_path, mode='a',header=False,index=False,sep=',')
    del dataframe

#--------------------------------------------------------------------------------------------------------------------------------       
def mse_check(train, test):
    LR = LinearRegression(n_jobs = -1)
    LR.fit(train[0], train[1])
    MSELR = ((LR.predict(test[0]) - test[1]) ** 2).mean()
    return MSELR
 
#--------------------------------------------------------------------------------------------------------------------------------       
def cal(p_data_arr,\
        p_label_arr_onehot,\
        p_key_feture_number,\
        p_seed):
    
    C_train_x,C_test_x,C_train_y,C_test_y= train_test_split(p_data_arr,p_label_arr_onehot,test_size=0.2,random_state=p_seed)

    os.environ['PYTHONHASHSEED'] = str(p_seed)
    np.random.seed(p_seed)
    rn.seed(p_seed)
    
    train_feature=C_train_x
    train_label=C_train_y
    test_feature=C_test_x
    test_label=C_test_y
    
    #--------------------------------------------------------------------------------------------------------------------------------
    # Construct affinity matrix
    kwargs_W = {"metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1}
    
    t_start = time.time()

    train_W = construct_W.construct_W(train_feature, **kwargs_W)

    # Obtain the scores of features, and sort the feature scores in an ascending order according to the feature scores
    train_score = lap_score.lap_score(train_feature, W=train_W)
    train_idx = lap_score.feature_ranking(train_score)
    
    t_used=time.time() - t_start
    write_to_csv(np.array([t_used]),"./log/LS_time.csv")

    # oOtain the dataset on the selected features
    C_train_selected_x = train_feature[:, train_idx[0:key_feture_number]]

    test_W = construct_W.construct_W(test_feature, **kwargs_W)

    # Obtain the scores of features, and sort the feature scores in an ascending order according to the feature scores
    test_score = lap_score.lap_score(test_feature, W=test_W)
    test_idx = lap_score.feature_ranking(test_score)

    # Obtain the dataset on the selected features
    C_test_selected_x = test_feature[:, test_idx[0:key_feture_number]]

    # Classification on original features
    train_feature=C_train_x
    train_label=C_train_y
    test_feature=C_test_x
    test_label=C_test_y
    
    orig_train_acc,orig_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)
    
    # Classification on selected features
    train_feature=C_train_selected_x
    train_label=C_train_y
    test_feature=C_test_selected_x
    test_label=C_test_y

    selec_train_acc,selec_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)

    # Linear reconstruction
    train_feature_tuple=(C_train_selected_x,C_train_x)
    test_feature_tuple=(C_test_selected_x,C_test_x)

    reconstruction_loss=mse_check(train_feature_tuple, test_feature_tuple)
    results=np.array([orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss])
    
    write_to_csv(results.reshape(1,len(results)),"./log/LS_results.csv")
    
    return orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss

In [5]:
p_data_arr=data_arr
p_label_arr_onehot=label_arr_onehot
p_key_feture_number=key_feture_number

In [None]:
for p_seed in np.arange(0,50):
    orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss=cal(p_data_arr,\
                                                                                        p_label_arr_onehot,\
                                                                                        p_key_feture_number,\
                                                                                        p_seed)

  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9251523063533508
Testing accuracy： 0.9251523063533508


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.2854656222802437
Testing accuracy： 0.2854656222802437


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.9338555265448216
Testing accuracy： 0.9338555265448216


  clf.fit(p_train_feature, p_train_label)


Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.31592689295039167
Testing accuracy： 0.31592689295039167
