# 1. Import libraries

In [1]:
#----------------------------Reproducible----------------------------------------------------------------------------------------
import numpy as np
import random as rn
import os

seed=0
os.environ['PYTHONHASHSEED'] = str(seed)

np.random.seed(seed)
rn.seed(seed)

#----------------------------Reproducible----------------------------------------------------------------------------------------

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

#--------------------------------------------------------------------------------------------------------------------------------
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline
matplotlib.style.use('ggplot')

import random
import scipy.sparse as sparse
import scipy.io

from keras.utils import to_categorical
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from skfeature.function.similarity_based import lap_score
from skfeature.utility import construct_W
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression
from skfeature.function.sparse_learning_based.UDFS import udfs
from skfeature.utility.sparse_learning import feature_ranking
import time

from concrete_autoencoder import ConcreteAutoencoderFeatureSelector
from keras.layers import Dense
import pandas as pd

Using TensorFlow backend.


In [2]:
#--------------------------------------------------------------------------------------------------------------------------------
def ETree(p_train_feature,p_train_label,p_test_feature,p_test_label,p_seed):
    clf = ExtraTreesClassifier(n_estimators=50, random_state=p_seed)
    
    # Training
    clf.fit(p_train_feature, p_train_label)
    
    # Training accuracy
    print('Training accuracy：',clf.score(p_train_feature, np.array(p_train_label)))
    print('Training accuracy：',accuracy_score(np.array(p_train_label),clf.predict(p_train_feature)))
    #print('Training accuracy：',np.sum(clf.predict(p_train_feature)==np.array(p_train_label))/p_train_label.shape[0])

    # Testing accuracy
    print('Testing accuracy：',clf.score(p_test_feature, np.array(p_test_label)))
    print('Testing accuracy：',accuracy_score(np.array(p_test_label),clf.predict(p_test_feature)))
    #print('Testing accuracy：',np.sum(clf.predict(p_test_feature)==np.array(p_test_label))/p_test_label.shape[0])

In [3]:
#--------------------------------------------------------------------------------------------------------------------------------
def write_to_csv(p_data,p_path):
    dataframe = pd.DataFrame(p_data)
    dataframe.to_csv(p_path, mode='a',header=False,index=False,sep=',')

# 2. Loading data

In [4]:
data_path="./Dataset/SMK_CAN_187.mat"
Data = scipy.io.loadmat(data_path)

data_arr=Data['X']
label_arr=Data['Y'][:, 0]-1

C_train_x,C_test_x,C_train_y,C_test_y= train_test_split(data_arr,label_arr,test_size=0.2,random_state=seed)

print('Shape of C_train_x: ' + str(C_train_x.shape)) 
print('Shape of C_train_y: ' + str(C_train_y.shape)) 
print('Shape of C_test_x: ' + str(C_test_x.shape)) 
print('Shape of C_test_y: ' + str(C_test_y.shape)) 

Shape of C_train_x: (149, 19993)
Shape of C_train_y: (149,)
Shape of C_test_x: (38, 19993)
Shape of C_test_y: (38,)


In [5]:
key_feture_number=64

# 3. Model

In [6]:
def decoder(x):
    x = Dense(key_feture_number)(x)
    x = Dense(data_arr.shape[1])(x)
    return x

In [7]:
start = time.clock()

selector = ConcreteAutoencoderFeatureSelector(K = key_feture_number, output_function = decoder, num_epochs = 1000)
selector.fit(C_train_x, C_train_x, C_test_x, C_test_x)

train_compressed_Data=data_arr[:, selector.get_support(indices=True)]

time_cost=time.clock() - start
write_to_csv(np.array([time_cost]),"./log/CAS_time"+str(key_feture_number)+".csv")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


  """Entry point for launching an IPython kernel.


Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 19993)             0         
_________________________________________________________________
concrete_select (ConcreteSel (None, 64)                1279553   
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 19993)             1299545   
Total params: 2,583,258
Trainable params: 2,583,257
Non-trainable params: 1
_________________________________________________________________
None

Train on 149 samples, validate on 38 samples
Epoch 1/1000
mean max of probabilities: 5.1163606e-05 - temperature 10.0
Epoch 2/1000
mean max of probabilities: 5.125959e-05 - temperature 9.940314
Epoch 3/1000
mean max of probabilities: 5.13315

  


In [8]:
train_compressed_Data.shape

(187, 64)

# 4. Classifying

### Extra Trees

In [9]:
train_feature=C_train_x
train_label=C_train_y
test_feature=C_test_x
test_label=C_test_y

print('Shape of train_feature: ' + str(train_feature.shape)) 
print('Shape of train_label: ' + str(train_label.shape)) 
print('Shape of test_feature: ' + str(test_feature.shape)) 
print('Shape of test_label: ' + str(test_label.shape)) 

p_seed=seed
ETree(train_feature,train_label,test_feature,test_label,p_seed)

Shape of train_feature: (149, 19993)
Shape of train_label: (149,)
Shape of test_feature: (38, 19993)
Shape of test_label: (38,)
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.7368421052631579
Testing accuracy： 0.7368421052631579


In [10]:
C_train_selected_x,C_test_selected_x,C_train_y,C_test_y= train_test_split(train_compressed_Data,label_arr,test_size=0.2,random_state=seed)

print('Shape of C_train_x: ' + str(C_train_x.shape)) 
print('Shape of C_train_y: ' + str(C_train_y.shape)) 
print('Shape of C_test_x: ' + str(C_test_x.shape)) 
print('Shape of C_test_y: ' + str(C_test_y.shape)) 

Shape of C_train_x: (149, 19993)
Shape of C_train_y: (149,)
Shape of C_test_x: (38, 19993)
Shape of C_test_y: (38,)


In [11]:
train_feature=C_train_selected_x
train_label=C_train_y

test_feature=C_test_selected_x
test_label=C_test_y

print('Shape of train_feature: ' + str(train_feature.shape)) 
print('Shape of train_label: ' + str(train_label.shape)) 
print('Shape of test_feature: ' + str(test_feature.shape)) 
print('Shape of test_label: ' + str(test_label.shape)) 

p_seed=seed
ETree(train_feature,train_label,test_feature,test_label,p_seed)

Shape of train_feature: (149, 64)
Shape of train_label: (149,)
Shape of test_feature: (38, 64)
Shape of test_label: (38,)
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.7368421052631579
Testing accuracy： 0.7368421052631579


# 5. Classifying 2

## Extra Trees

In [12]:
train_feature=C_train_selected_x
train_label=C_train_y

test_feature=C_test_selected_x
test_label=C_test_y

print('Shape of train_feature: ' + str(train_feature.shape)) 
print('Shape of train_label: ' + str(train_label.shape)) 
print('Shape of test_feature: ' + str(test_feature.shape)) 
print('Shape of test_label: ' + str(test_label.shape)) 

p_seed=seed
ETree(train_feature,train_label,test_feature,test_label,p_seed)

Shape of train_feature: (149, 64)
Shape of train_label: (149,)
Shape of test_feature: (38, 64)
Shape of test_label: (38,)
Training accuracy： 1.0
Training accuracy： 1.0
Testing accuracy： 0.7368421052631579
Testing accuracy： 0.7368421052631579


# 6. Reconstruction loss

In [13]:
from sklearn.linear_model import LinearRegression

def mse_check(train, test):
    LR = LinearRegression(n_jobs = -1)
    LR.fit(train[0], train[1])
    MSELR = ((LR.predict(test[0]) - test[1]) ** 2).mean()
    return MSELR

In [14]:
train_feature_tuple=(C_train_selected_x,C_train_x)
test_feature_tuple=(C_test_selected_x,C_test_x)

reconstruction_loss=mse_check(train_feature_tuple, test_feature_tuple)
print(reconstruction_loss)

0.07727934996354033
