# 1. Import libraries

In [None]:
#----------------------------Reproducible----------------------------------------------------------------------------------------
import numpy as np
import tensorflow as tf
import random as rn
import os

seed=0
os.environ['PYTHONHASHSEED'] = str(seed)

np.random.seed(seed)
rn.seed(seed)
#session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
session_conf =tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

from keras import backend as K

#tf.set_random_seed(seed)
tf.compat.v1.set_random_seed(seed)
#sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)

K.set_session(sess)
#----------------------------Reproducible----------------------------------------------------------------------------------------

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

#--------------------------------------------------------------------------------------------------------------------------------
from keras.datasets import mnist
from keras.models import Model
from keras.layers import Dense, Input, Flatten, Activation, Dropout, Layer
from keras.layers.normalization import BatchNormalization
from keras.utils import to_categorical
from keras import optimizers,initializers,constraints,regularizers
from keras import backend as K
from keras.callbacks import LambdaCallback,ModelCheckpoint
from keras.utils import plot_model
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer 
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import pandas as pd
import time
import h5py
import math
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline
matplotlib.style.use('ggplot')

from skimage import io
from PIL import Image
from sklearn.model_selection import train_test_split
import scipy.sparse as sparse

#--------------------------------------------------------------------------------------------------------------------------------
#Import ourslef defined methods
import sys
sys.path.append(r"../Defined")
import Functions as F

# The following code should be added before the keras model
#np.random.seed(seed)

In [None]:
l1_lambda=1

# 2. Loading data

In [None]:
dataset_path='../Dataset/coil-20-proc/'

samples={}
for dirpath, dirnames, filenames in os.walk(dataset_path):
    #print(dirpath)
    #print(dirnames)
    #print(filenames)
    dirnames.sort()
    filenames.sort()
    for filename in [f for f in filenames if f.endswith(".png") and not f.find('checkpoint')>0]:
        full_path = os.path.join(dirpath, filename)
        file_identifier=filename.split('__')[0][3:]
        if file_identifier not in samples.keys():
            samples[file_identifier] = []
        # Direct read
        #image = io.imread(full_path)
        # Resize read
        image_=Image.open(full_path).resize((20, 20),Image.ANTIALIAS)
        image=np.asarray(image_)
        samples[file_identifier].append(image)
        
#plt.imshow(samples['1'][0].reshape(20,20))

In [None]:
data_arr_list=[]
label_arr_list=[]
for key_i in samples.keys():
    key_i_for_label=[int(key_i)-1]
    data_arr_list.append(np.array(samples[key_i]))
    label_arr_list.append(np.array(72*key_i_for_label))
    
data_arr=np.concatenate(data_arr_list).reshape(1440, 20*20).astype('float32') / 255.
label_arr_onehot=np.concatenate(label_arr_list)#to_categorical(np.concatenate(label_arr_list))

In [None]:
key_feture_number=50

# 3. Calculation

In [None]:
#--------------------------------------------------------------------------------------------------------------------------------
def write_to_csv(p_data,p_path):
    dataframe = pd.DataFrame(p_data)
    dataframe.to_csv(p_path, mode='a',header=False,index=False,sep=',')
    del dataframe

#--------------------------------------------------------------------------------------------------------------------------------       
def mse_check(train, test):
    LR = LinearRegression(n_jobs = -1)
    LR.fit(train[0], train[1])
    MSELR = ((LR.predict(test[0]) - test[1]) ** 2).mean()
    return MSELR
 
#--------------------------------------------------------------------------------------------------------------------------------       
def cal(p_data_arr,\
        p_label_arr_onehot,\
        p_key_feture_number,\
        p_epochs_number,\
        p_batch_size_value,\
        p_seed):        
        
    C_train_x,C_test_x,C_train_y,C_test_y= train_test_split(p_data_arr,p_label_arr_onehot,test_size=0.2,random_state=p_seed)
    x_train,x_validate,y_train_onehot,y_validate_onehot= train_test_split(C_train_x,C_train_y,test_size=0.1,random_state=p_seed)
    x_test=C_test_x
    y_test_onehot=C_test_y

    os.environ['PYTHONHASHSEED'] = str(p_seed)
    np.random.seed(p_seed)
    rn.seed(p_seed)
    tf.compat.v1.set_random_seed(p_seed)
    
    #--------------------------------------------------------------------------------------------------------------------------------
    class Feature_Select_Layer(Layer):
    
        def __init__(self, output_dim, l1_lambda, **kwargs):
            super(Feature_Select_Layer, self).__init__(**kwargs)
            self.output_dim = output_dim
            self.l1_lambda=l1_lambda

        def build(self, input_shape):
            self.kernel = self.add_weight(name='kernel',  
                                      shape=(input_shape[1],),
                                      initializer=initializers.RandomUniform(minval=0., maxval=1.),
                                      trainable=True,
                                      regularizer=regularizers.l1(self.l1_lambda),
                                      constraint=constraints.NonNeg())
            super(Feature_Select_Layer, self).build(input_shape)
    
        def call(self, x, selection=False,k=p_key_feture_number):
            kernel=self.kernel        
            if selection:
                kernel_=K.transpose(kernel)
                print(kernel_.shape)
                kth_largest = tf.math.top_k(kernel_, k=k)[0][-1]
                kernel = tf.where(condition=K.less(kernel,kth_largest),x=K.zeros_like(kernel),y=kernel)        
            return K.dot(x, tf.linalg.tensor_diag(kernel))

        def compute_output_shape(self, input_shape):
            return (input_shape[0], self.output_dim)

    #--------------------------------------------------------------------------------------------------------------------------------
    def Identity_Autoencoder(p_data_feature,\
                             p_encoding_dim,\
                             p_learning_rate,\
                             p_l1_lambda,\
                            p_seed=p_seed):
    
        input_img = Input(shape=(p_data_feature,), name='autoencoder_input')

        feature_selection = Feature_Select_Layer(output_dim=p_data_feature,\
                                             l1_lambda=p_l1_lambda,\
                                             input_shape=(p_data_feature,),\
                                             name='feature_selection')

        feature_selection_score=feature_selection(input_img)

        encoded = Dense(p_encoding_dim,\
                    activation='tanh',\
                    kernel_initializer=initializers.glorot_uniform(seed),\
                    name='autoencoder_hidden_layer')
    
        encoded_score=encoded(feature_selection_score)
    
        bottleneck_score=encoded_score
    
        decoded = Dense(p_data_feature,\
                    activation='tanh',\
                    kernel_initializer=initializers.glorot_uniform(seed),\
                    name='autoencoder_output')
    
        decoded_score =decoded(bottleneck_score)

        latent_encoder_score = Model(input_img, bottleneck_score)
        autoencoder = Model(input_img, decoded_score)
    
        autoencoder.compile(loss='mean_squared_error',\
                        optimizer=optimizers.Adam(lr=p_learning_rate))
    
        #print('Autoencoder Structure-------------------------------------')
        #autoencoder.summary()
        return autoencoder,latent_encoder_score
    
    t_start = time.time()
    
    Ide_AE,\
    latent_encoder_score_Ide_AE=Identity_Autoencoder(p_data_feature=x_train.shape[1],\
                                                 p_encoding_dim=p_key_feture_number,\
                                                 p_learning_rate= 1E-2,\
                                                 p_l1_lambda=l1_lambda,\
                                                    p_seed=p_seed)    
    
    Ide_AE_history = Ide_AE.fit(x_train, x_train,\
                            epochs=p_epochs_number,\
                            batch_size=p_batch_size_value,\
                            shuffle=True,\
                            verbose=0,\
                            validation_data=(x_validate,x_validate))
    
    t_used=time.time() - t_start
    
    write_to_csv(np.array([t_used]),"./log/AgnoSS_time.csv")
    
    #--------------------------------------------------------------------------------------------------------------------------------
    p_data=Ide_AE.predict(x_test)
    numbers=x_test.shape[0]*x_test.shape[1]

    key_number=p_key_feture_number
    key_features=F.top_k_keepWeights_1(Ide_AE.get_layer(index=1).get_weights()[0],key_number)

    selected_position_list=np.where(key_features>0)[0]
    
    if np.sum(Ide_AE.get_layer(index=1).get_weights()[0]>0)>0:
    
        # Classification on original features
        train_feature=C_train_x
        train_label=C_train_y
        test_feature=C_test_x
        test_label=C_test_y
        orig_train_acc,orig_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)
        
        # Classification on selected features
        train_feature_=np.multiply(C_train_x, key_features)
        train_feature=F.compress_zero_withkeystructure(train_feature_,selected_position_list)
        train_label=C_train_y

        test_feature_=np.multiply(C_test_x, key_features)
        test_feature=F.compress_zero_withkeystructure(test_feature_,selected_position_list)
        test_label=C_test_y

        selec_train_acc,selec_test_acc=F.ETree(train_feature,train_label,test_feature,test_label,0)

        # Linear reconstruction
        train_feature_=np.multiply(C_train_x, key_features)
        C_train_selected_x=F.compress_zero_withkeystructure(train_feature_,selected_position_list)

        test_feature_=np.multiply(C_test_x, key_features)
        C_test_selected_x=F.compress_zero_withkeystructure(test_feature_,selected_position_list)

        train_feature_tuple=(C_train_selected_x,C_train_x)
        test_feature_tuple=(C_test_selected_x,C_test_x)

        reconstruction_loss=mse_check(train_feature_tuple, test_feature_tuple)

        print("Classification on original data",orig_test_acc)
        print("Classification on selected features",selec_test_acc)
        print("Linear reconstruction loss",reconstruction_loss)
        print("-----------------------------------------------------------------------------")
        print("\n\n")
    else:
        orig_train_acc=-1
        orig_test_acc=-1
        selec_train_acc=-1
        selec_test_acc=-1
        reconstruction_loss=-1

    results=np.array([orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss])

    write_to_csv(results.reshape(1,len(results)),"./log/AgnoSS_results.csv")
    
    return orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss

In [None]:
epochs_number=1000
batch_size_value=128

p_data_arr=data_arr
p_label_arr_onehot=label_arr_onehot
p_key_feture_number=key_feture_number
p_epochs_number=epochs_number
p_batch_size_value=batch_size_value

In [None]:
for p_seed in np.arange(0,50):
    orig_train_acc,orig_test_acc,selec_train_acc,selec_test_acc,reconstruction_loss=cal(p_data_arr,\
                                                                                        p_label_arr_onehot,\
                                                                                        p_key_feture_number,\
                                                                                        p_epochs_number,\
                                                                                        p_batch_size_value,\
                                                                                        p_seed)