In [None]:
!pip install talos==0.6.4
!pip install keras==2.2.4

In [3]:
import tensorflow as tf
import keras
print(keras.__version__)

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Layer, Activation
from keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_blobs, make_moons, make_circles
from keras import backend as K
import pandas as pd
import re

from talos.model.early_stopper import early_stopper
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard

import talos as ta
from keras import optimizers
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import StandardScaler
import timeit
from IPython.display import clear_output 
import os

from keras import losses
import keras.backend as K
import keras.backend.tensorflow_backend as tfb

%matplotlib inline
import matplotlib.ticker


class LIGHTVSigmoid(Layer):
    def __init__(self, T, C, r, E, N0, **kwargs):
        super(LIGHTVSigmoid, self).__init__(**kwargs)
        self.T = K.cast_to_floatx(T)
        self.r = K.cast_to_floatx(r)
        self.C = K.cast_to_floatx(C)
        self.E = K.cast_to_floatx(E)
        self.N0 = K.cast_to_floatx(N0)
        
    def call(self, inputs):
        if K.cast(K.greater(inputs, self.T), K.floatx()) == 1:
            temp = self.C
            NT = temp/(1-(1-temp/self.N0)*K.exp(-self.r*inputs))
            
            temp = (self.r-self.E)*self.C/self.r 
            result = temp/(1-(1-temp/NT)*K.exp(-(self.r-self.E)*(inputs-self.T)))
        else:
            temp = self.C 
            result = temp/(1-(1-temp/self.N0)*K.exp(-self.r*inputs))
        return result

    def get_config(self):
        config = {'T': float(self.T), 'r': float(self.r), 'C': float(self.C), 'E': float(self.E), 'N0': float(self.N0)}
        base_config = super(LIGHTVSigmoid, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape
    
    
class LIGHTGSigmoid(Layer):
    def __init__(self, T, C, r, E, N0, **kwargs):
        super(LIGHTGSigmoid, self).__init__(**kwargs)
        self.T = K.cast_to_floatx(T)
        self.r = K.cast_to_floatx(r)
        self.C = K.cast_to_floatx(C)
        self.E = K.cast_to_floatx(E)
        self.N0 = K.cast_to_floatx(N0)
        
    def call(self, inputs):
        if K.cast(K.greater(inputs, self.T), K.floatx()) == 1:
            temp = np.log(self.N0/self.C) 
            NT = self.C*K.exp(temp*K.exp(-self.r*inputs))
            
            temp = self.r*np.log(NT/self.C) + self.E  
            result = self.C*K.exp((temp*K.exp(-self.r*(inputs-self.T))-self.E)/self.r)
        else:
            temp = np.log(self.N0/self.C) 
            result = self.C*K.exp(temp*K.exp(-self.r*inputs))
            
        return result

    def get_config(self):
        config = {'T': float(self.T), 'r': float(self.r), 'C': float(self.C), 'E': float(self.E), 'N0': float(self.N0)}
        base_config = super(LIGHTGSigmoid, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape


def binary_crossentropy_light(y_true, y_pred):
    return K.mean(binary_crossentropy_light_tf(y_true, y_pred), axis=-1)

def binary_crossentropy_light_tf(target, output, from_logits=False):
    
    if not from_logits:
        # transform back to logits
        _epsilon = tfb._to_tensor(tfb.epsilon(), output.dtype.base_dtype)
        output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
        output = tf.log(output / (1 - output))

    return tf.nn.sigmoid_cross_entropy_with_logits(labels = target,
                                                   logits = output)

    
def build_modelG(x_train, y_train, x_val, y_val, params):
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    model = Sequential()
    model.add(Dense(1, input_dim = x_train.shape[1]))
    #model.add(Activation('relu'))
    #model.add(Dense(num_h))  
    #model.add(Dense(1)) 
    model.add(LIGHTGSigmoid(T=params['T'], C=params['C'], r=params['r'], E = params['E'], N0 = params['N0']))
    model.compile(loss = binary_crossentropy_light, optimizer = 'sgd', metrics=['acc'])
    
    
    checkpointer = ModelCheckpoint(filepath='model.weights.bestG.hdf5', verbose = 0, save_best_only=False)

    out = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=random_search_epochs,
                    callbacks=[checkpointer,early_stopper(random_search_epochs, patience=10)],
                    verbose = 0,
                    validation_data=[x_val, y_val])
    
    return out, model

def build_modelV(x_train, y_train, x_val, y_val, params):
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    model = Sequential()
    model.add(Dense(1, input_dim = x_train.shape[1]))
    #model.add(Activation('relu'))
    #model.add(Dense(num_h))  
    #model.add(Dense(1))
    model.add(LIGHTVSigmoid(T=params['T'], C=params['C'], r=params['r'], E = params['E'], N0 = params['N0']))
    model.compile(loss = binary_crossentropy_light, optimizer = 'sgd', metrics=['acc'])

    
    checkpointer = ModelCheckpoint(filepath='model.weights.bestV.hdf5', verbose = 0, save_best_only=False)

    out = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=random_search_epochs,
                    callbacks=[checkpointer,early_stopper(random_search_epochs, patience=10)],
                    verbose = 0,
                    validation_data=[x_val, y_val])
    
    return out, model

def upload_dataset_mnist(n_samples, test_size, target):
    
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
        
    y_train = (y_train == target)
    y_test = (y_test == target)
    
    idx = np.arange(x_train.shape[0])
    np.random.shuffle(idx)
    x_train = x_train[idx]
    y_train = y_train[idx]
    x_train = x_train[:n_samples]
    y_train = y_train[:n_samples]

    idx = np.arange(x_test.shape[0])
    np.random.shuffle(idx)
    x_test = x_test[idx]
    y_test = y_test[idx]
    x_test = x_test[:round(n_samples*test_size)]
    y_test = y_test[:round(n_samples*test_size)]

    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1]*x_train.shape[2])
    x_test = x_test.reshape(x_test.shape[0], x_test.shape[1]*x_test.shape[2])


    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255


    return x_train, x_test, y_train, y_test

def upload_dataset_fmnist(n_samples, test_size, target):
    
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
    
    y_train = (y_train == target)
    y_test = (y_test == target)
        
    idx = np.arange(x_train.shape[0])
    np.random.shuffle(idx)
    x_train = x_train[idx]
    y_train = y_train[idx]
    x_train = x_train[:n_samples]
    y_train = y_train[:n_samples]

    idx = np.arange(x_test.shape[0])
    np.random.shuffle(idx)
    x_test = x_test[idx]
    y_test = y_test[idx]
    x_test = x_test[:round(n_samples*test_size)]
    y_test = y_test[:round(n_samples*test_size)]

    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1]*x_train.shape[2])
    x_test = x_test.reshape(x_test.shape[0], x_test.shape[1]*x_test.shape[2])


    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255


    return x_train, x_test, y_train, y_test

def grayscale(data, dtype='float32'):
    # luma coding weighted average in video systems
    r, g, b = np.asarray(.3, dtype=dtype), np.asarray(.59, dtype=dtype), np.asarray(.11, dtype=dtype)
    rst = r * data[:, :, :, 0] + g * data[:, :, :, 1] + b * data[:, :, :, 2]
    # add channel dimension
    rst = np.expand_dims(rst, axis=3)
    return rst


def upload_dataset_cifar10(n_samples, test_size, target):
    
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
    
    x_train = grayscale(x_train)
    x_test = grayscale(x_test)
    
    y_train = (y_train == target)
    y_test = (y_test == target)
    
    idx = np.arange(x_train.shape[0])
    np.random.shuffle(idx)
    x_train = x_train[idx]
    y_train = y_train[idx]
    x_train = x_train[:n_samples]
    y_train = y_train[:n_samples]

    idx = np.arange(x_test.shape[0])
    np.random.shuffle(idx)
    x_test = x_test[idx]
    y_test = y_test[idx]
    x_test = x_test[:round(n_samples*test_size)]
    y_test = y_test[:round(n_samples*test_size)]

    x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1]*x_train.shape[2]*x_train.shape[3]))
    x_test = np.reshape(x_test,(x_test.shape[0],x_test.shape[1]*x_test.shape[2]*x_test.shape[3]))

    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255

    return x_train, x_test, y_train, y_test




def extract_hyper_param_from_csv(r, s):
    path = os.path.abspath(os.getcwd()+'/'+s+str(r))
    file_extract = [(root, max(fname for fname in files if fname.endswith(".csv"))) for root, dirs, files in os.walk(path)]   
    extract_name = file_extract[0][1]
    
    return extract_name, path


def optimize_hyper_param(x, y, opt, model, r, fraction_limit, s):

    if opt == 'Er':
        r_range = [4.08]
        E_range = [6.4]
    elif opt == 'E':
        r_range = [1]
        E_range = (0.0,20,5)
    elif opt == 'r': 
        r_range = (0.1,20,5)
        E_range = [0]

    p = {'T': (1.0,3.0,3),
         'C': [1],
         'r': r_range,
         'E': E_range,
         'N0': (0.2,0.8,5)
    }

    
    h = ta.Scan(x, y, params=p,
                model=model,
                experiment_name=s+str(r),
                fraction_limit = fraction_limit)
    
    extract_name, path = extract_hyper_param_from_csv(r, s)
    
    opt_hyper_param = pd.read_csv(path + '/' + extract_name)
    opt_hyper_param = opt_hyper_param.round(4)
    opt_hyper_param_sort = opt_hyper_param.sort_values('val_acc', ascending=False)
    
    T = opt_hyper_param_sort['T'].iloc[0]
    C = opt_hyper_param_sort['C'].iloc[0]
    r = opt_hyper_param_sort['r'].iloc[0]
    E = opt_hyper_param_sort['E'].iloc[0]
    N0 = opt_hyper_param_sort['N0'].iloc[0]
    acc = opt_hyper_param_sort['val_acc'].iloc[0]
       
    return T, C, r, E, N0, acc

def build_adam_model(num_h):
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    model = Sequential()
    model.add(Dense(1, input_dim = x_train.shape[1]))
    #model.add(Activation('relu'))
    #model.add(Dense(num_h))  
    #model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    return model

def build_adagrad_model(num_h):
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    model = Sequential()
    model.add(Dense(1, input_dim = x_train.shape[1]))
    #model.add(Activation('relu'))
    #model.add(Dense(num_h))  
    #model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adagrad', metrics=['acc'])

    return model

def build_sgd_model(num_h):
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    model = Sequential()
    model.add(Dense(1, input_dim = x_train.shape[1]))
    #model.add(Activation('relu'))
    #model.add(Dense(num_h))  
    #model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['acc'])

    return model


def build_best_model(LIGHT_type, T, C, r, E, N0, num_h):
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    model = Sequential()
    model.add(Dense(1, input_dim = x_train.shape[1]))
    #model.add(Activation('relu'))
    #model.add(Dense(num_h))  
    #model.add(Dense(1))
    model.add(LIGHT_type(T=T, C=C, r=r, E = E, N0 = N0))
    model.compile(loss=binary_crossentropy_light, optimizer='sgd', metrics=['acc'])

    return model

def train_light_models(x_train, y_train, x_test, y_test, batch_size, epochs, num_h, T_v, C_v, r_v, E_v, N0_v, T_g, C_g, r_g, E_g, N0_g):
    tensorboard = TensorBoard(log_dir='./logs',
                 histogram_freq=0,batch_size=batch_size,
                 write_graph=False, 
                 write_images=False)

    model_bestV = build_best_model(LIGHTVSigmoid, T_v, C_v, r_v, E_v, N0_v, num_h)
    model_bestG = build_best_model(LIGHTGSigmoid, T_g, C_g, r_g, E_g, N0_g, num_h)

    
    out_bestV = model_bestV.fit(x_train,
         y_train,
         batch_size = batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)

    out_bestG = model_bestG.fit(x_train,
         y_train,
         batch_size = batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)
      
    return out_bestV, out_bestG

def train_default_models(x_train, y_train, x_test, y_test, batch_size, epochs, num_h):
    tensorboard = TensorBoard(log_dir='./logs',
                 histogram_freq=0,batch_size=batch_size,
                 write_graph=False, 
                 write_images=False)

    model_adam = build_adam_model(num_h)
    model_adagrad = build_adagrad_model(num_h)
    model_sgd = build_sgd_model(num_h)


    out_adam = model_adam.fit(x_train,
         y_train,
         batch_size=batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)
    
    out_adagrad = model_adagrad.fit(x_train,
         y_train,
         batch_size=batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)
    
    out_sgd = model_sgd.fit(x_train,
         y_train,
         batch_size=batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)
      
    return out_adam, out_adagrad, out_sgd

def train_models(x_train, y_train, x_test, y_test, batch_size, epochs, num_h, T_v, C_v, r_v, E_v, N0_v, T_g, C_g, r_g, E_g, N0_g):
    tensorboard = TensorBoard(log_dir='./logs',
                 histogram_freq=0,batch_size=batch_size,
                 write_graph=False, 
                 write_images=False)

    model_bestV = build_best_model(LIGHTVSigmoid, T_v, C_v, r_v, E_v, N0_v, num_h)
    model_bestG = build_best_model(LIGHTGSigmoid, T_g, C_g, r_g, E_g, N0_g, num_h)
    model_adam = build_adam_model(num_h)
    model_adagrad = build_adagrad_model(num_h)
    model_sgd = build_sgd_model(num_h)

    
    out_bestV = model_bestV.fit(x_train,
         y_train,
         batch_size = batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)

    out_bestG = model_bestG.fit(x_train,
         y_train,
         batch_size = batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)

    out_adam = model_adam.fit(x_train,
         y_train,
         batch_size=batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)
    
    out_adagrad = model_adagrad.fit(x_train,
         y_train,
         batch_size=batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)
    
    out_sgd = model_sgd.fit(x_train,
         y_train,
         batch_size=batch_size, shuffle=False,
         epochs=epochs,
         validation_data=(x_test, y_test),
         callbacks=[tensorboard], verbose = 0)
      
    return out_bestV, out_bestG, out_adam, out_adagrad, out_sgd

2.3.1


In [None]:
# setup
random_state = 41
    
name = 'cifar10'
target = 5
num_samples_total = 1000

test_size = 0.2
batch_size = 75

opt_options = ['Er', 'E', 'r']

fraction_limit = 0.075 

random_search_epochs = 1
num_h = 0
L = 0

epoch = 1500
rounds = 5

# run experiments
hyper_round_V = []
hyper_round_G = []

out_round_V = []
out_round_G = []
out_round_sgd = []
out_round_adam = []
out_round_adagrad = []

start = timeit.default_timer()
for r in range(1,rounds+1):

    # generate datasets
    if name == 'mnist':
        x_train, x_test, y_train, y_test = upload_dataset_mnist(num_samples_total, test_size, target)
    elif name == 'fmnist':
        x_train, x_test, y_train, y_test = upload_dataset_fmnist(num_samples_total, test_size, target)
    elif name == 'cifar10':
        x_train, x_test, y_train, y_test = upload_dataset_cifar10(num_samples_total, test_size, target)
    
    for pi in opt_options:
        # optimize hyperparameters
        T_v, C_v, r_v, E_v, N0_v, acc_v  = optimize_hyper_param(x_train, y_train, pi, build_modelV, r, fraction_limit, 'hypersV')    
        T_g, C_g, r_g, E_g, N0_g, acc_g = optimize_hyper_param(x_train, y_train, pi, build_modelG, r, fraction_limit, 'hypersG')
    
        hyper_round_V.append([T_v, C_v, r_v, E_v, N0_v, acc_v])
        hyper_round_V_dataframe = pd.DataFrame(hyper_round_V,
                                           columns =['T', 'C', 'r', 'E', 'N0', 'val_acc']) 
        hyper_round_G.append([T_g, C_g, r_g, E_g, N0_g, acc_g])
        hyper_round_G_dataframe = pd.DataFrame(hyper_round_G,
                                           columns =['T', 'C', 'r', 'E', 'N0', 'val_acc']) 

        print(hyper_round_V_dataframe)
        print(hyper_round_G_dataframe)
        
        out_V, out_G = train_light_models(x_train, y_train, x_test, y_test, batch_size, epoch, num_h, T_v, C_v, r_v, E_v, N0_v, T_g, C_g, r_g, E_g, N0_g)      
        
        out_V = pd.DataFrame.from_dict(out_V.history)
        out_G = pd.DataFrame.from_dict(out_G.history)
           
        out_round_V.append(out_V)
        out_round_G.append(out_G)
       
        
    out_adam, out_adagrad, out_sgd = train_default_models(x_train, y_train, x_test, y_test, batch_size, epoch, num_h)      
    out_adam = pd.DataFrame.from_dict(out_adam.history)
    out_adagrad = pd.DataFrame.from_dict(out_adagrad.history)
    out_sgd = pd.DataFrame.from_dict(out_sgd.history)
    
    out_round_adam.append(out_adam)
    out_round_adagrad.append(out_adagrad)
    out_round_sgd.append(out_sgd)
    
    
    stop = timeit.default_timer()
    if (r/rounds*100) < 5:
        expected_time = 'Calculating ...'
    else:
        time_perc = timeit.default_timer()
        expected_time = np.round( ( (time_perc-start)/(r/rounds) )/60, 2)
    print('Current progress:', np.round(r/rounds*100, 2), '%')
    print('Current run time:', np.round((stop-start)/60, 2), 'mins')
    print('Expected run time:', expected_time, 'mins')
        
out_round_V = pd.concat(out_round_V, axis = 1)
out_round_G = pd.concat(out_round_G, axis = 1)
out_round_adam = pd.concat(out_round_adam, axis = 1)
out_round_adagrad = pd.concat(out_round_adagrad, axis = 1)
out_round_sgd = pd.concat(out_round_sgd, axis = 1)

out_round_V = pd.concat([out_round_V, hyper_round_V_dataframe], axis= 1)
out_round_G = pd.concat([out_round_G, hyper_round_G_dataframe], axis= 1)
  
pd.DataFrame.from_dict(out_round_V).to_csv(name+ str(target)+'_nt_V_dl'+str(num_h)+'_L'+str(L)+'.csv',index=False)
pd.DataFrame.from_dict(out_round_G).to_csv(name + str(target)+'_nt_G_dl'+str(num_h)+'_L'+str(L)+'.csv',index=False)
pd.DataFrame.from_dict(out_round_adam).to_csv(name + str(target)+'_nt_adam_dl'+str(num_h)+'_L'+str(L)+'.csv',index=False)
pd.DataFrame.from_dict(out_round_adagrad).to_csv(name + str(target)+'_nt_adagrad_dl'+str(num_h)+'_L'+str(L)+'.csv',index=False)
pd.DataFrame.from_dict(out_round_sgd).to_csv(name + str(target)+'_nt_sgd_dl'+str(num_h)+'_L'+str(L)+'.csv',index=False)
