In [1]:
import tensorflow as tf
import numpy as np
import sys
import random
from scipy.stats import norm
import os
from sklearn.model_selection import train_test_split
from ESNN_layers import *
import pickle

In [2]:
#load example data
X = np.loadtxt('data/X_reg.txt')
Y = np.loadtxt('data/Y_reg.txt')
X = X.astype('float32')
Y = Y.astype('float32')
sample_size = X.shape[0]
all_indices = range(len(Y))
shuffled_indices = tf.random.shuffle(all_indices)
X, Y= tf.gather(X, shuffled_indices), tf.gather(Y, shuffled_indices)
X, Y=X.numpy(), Y.numpy()
X_train_raw, X_test, Y_train_raw, Y_test = train_test_split(X, Y, test_size=0.15)

In [3]:
#parameters setup
model_type = 'regression'
reg_type = 'linear'
L = 10 #number of models
nsample = 100 #number of samples used for approximate expectation in ELBO
nepoch =50 #training epoch
input_size = X.shape[1]
if len(Y.shape) == 1:
    output_size = 1
else:
    output_size = Y.shape[1]

initial_size = X.shape[1]
hidden_sizes = [5]# a list of numbers indicate hidden sizes
lamb = 1.0 #weight parameter in loss function
batch_size = 50 
sigma = 0.0001
temperature = 0.1 #for gumbel-softmax trick
tau = 0.3 #for scale alpha in softmax
mini_loss = np.mean(np.square(Y_test-np.mean(Y_test, axis = 0)))*1.0 #this is used as the threshold for purity
l = 0
iteration = 0
max_iter = 15

In [4]:
#run lasso for init
from sklearn.linear_model import LassoCV
from sklearn.linear_model import MultiTaskLassoCV
if output_size == 1:
    clf = LassoCV(cv=5, random_state=0).fit(X_train_raw, Y_train_raw)
else:
    clf = MultiTaskLassoCV(cv=5, random_state=0, max_iter = 2000).fit(X_train_raw, Y_train_raw)
pred_train = clf.predict(X_train_raw)
acc_train = np.mean((Y_train_raw - pred_train)**2)
pred_test = clf.predict(X_test)
acc_test = np.mean((Y_test - pred_test)**2)
print(f'Lasso Regression| train {acc_train:.4f} test {acc_test:.4f}')

Lasso Regression| train 0.7431 test 0.7057


In [5]:
####################################NN
##initializations
#if initialize with lasso coefficient
if output_size == 1:
    init_val = np.transpose(abs(clf.coef_))
else:
    init_val = np.mean(np.transpose(abs(clf.coef_)), axis = -1)
init_val = init_val.astype('float32')
init_val = np.reshape(init_val, (input_size, 1))
init_vals = list()
for i in range(L):
    temp_init_val = init_val
    temp_init_val = np.reshape(temp_init_val, (input_size, 1))
    init_vals.append(tf.convert_to_tensor(temp_init_val))
#model
all_model = list()
model = SNN(model_type, reg_type, sigma, input_size, output_size, hidden_sizes, temperature, tau, False, init_vals[0])
all_myloss = list()
all_prbs = list()
all_cs = list()
while l<L and iteration<= max_iter:
    myloss = np.zeros((nepoch, 4))
    for epoch in range(0, nepoch):
        learning_rate = 0.005*(0.995**epoch) # for classification
        model.optimizer = tf.optimizers.Adam(lr = learning_rate)
        all_indices = range(len(Y_train_raw))
        shuffled_indices = tf.random.shuffle(all_indices)
        train_bnn(model, tf.gather(X_train_raw, shuffled_indices), tf.gather(Y_train_raw, shuffled_indices), batch_size, learning_rate, True, nsample, 0.00005, 10.0)#0.00005
        pred, nll, kl = model.call(X_train_raw, Y_train_raw, True, 100)
        if output_size == 1:
            temp_train_acc = np.mean(tf.losses.MSE(pred[:,:,0], Y_train_raw))
        else:
            temp_train_acc = np.mean(tf.losses.MSE(pred, Y_train_raw))
        pred, temp_test_nll, kl = model.call(X_test, Y_test, True, 100)
        if output_size == 1:
            temp_test_acc = np.mean(tf.losses.MSE(pred[:,:,0], Y_test))
        else:
            temp_test_acc = np.mean(tf.losses.MSE(pred, Y_test))
        elbo = nll+kl
        myloss[epoch,0] = elbo
        myloss[epoch,1] = temp_train_acc
        # myloss[epoch,2] = temp_val_acc
        myloss[epoch,2] = temp_test_acc
        print("Iteration", iteration)
        print("Train loss", temp_train_acc)
        print("l", l)
        print("mini loss", mini_loss)
        print("Test loss", temp_test_acc)
        prbs = np.asarray(tf.nn.softmax(model.bnn.w_alpha[:,0]/tau))
        print('#################################################################################################################')
        print(np.where(prbs>0.1))
        print(np.where(prbs == np.max(prbs)))
        if epoch>3 and model_type == 'regression' and temp_test_acc<mini_loss-0.05:
            break
        if epoch>10 and model_type == 'regression' and temp_test_acc<mini_loss-0.01:
            break
        if epoch > 30:
            curr_avg = np.max(myloss[epoch-2:epoch,2])
            pre_avg = np.max(myloss[epoch-4:epoch-2,2])
            if model_type == 'regression' and curr_avg>= pre_avg:
                break
    if model_type == 'regression' and myloss[epoch,2]<=mini_loss-0.01:
        mini_loss = myloss[epoch,2]
        l += 1
        all_myloss.append(myloss)
        all_model.append(model)
        temp_prbs = np.asarray(tf.nn.softmax(model.bnn.w_alpha[:,0]/tau))
        if temp_prbs.shape[0]<initial_size:
            if len(all_cs) == 1:
                toinsert = np.unique(all_cs[0])
            else:
                toinsert = np.unique(np.concatenate(all_cs))
            temp_to_add = temp_prbs
            for pos in toinsert:
                temp_to_add = np.insert(temp_to_add, pos, 1e-10)
            all_prbs.append(temp_to_add)
        else:
            temp_to_add = temp_prbs
            all_prbs.append(temp_to_add)
        #derive residuals
        pred, nll, kl = model.call(X_train_raw, Y_train_raw, True, 100)
        if output_size == 1:
            res_train = np.mean(pred[:,:,0], axis = 0) - Y_train_raw
        else:
            res_train = np.mean(pred, axis = 0) - Y_train_raw
        pred, temp_test_nll, kl = model.call(X_test, Y_test, True, 100)
        if output_size == 1:
            res_test = np.mean(pred[:,:,0], axis = 0) - Y_test
        else:
            res_test = np.mean(pred, axis = 0) - Y_test
        Y_train_raw = res_train
        Y_test = res_test
        #compute cs
        nsnp = temp_prbs.shape[0]
        for temp_j in range(nsnp):
            cs_idx = nsnp-temp_j
            if sum(np.sort(temp_prbs)[cs_idx:])>0.95:
                break
        temp_cs = np.argsort(temp_prbs)[cs_idx:]
        if temp_cs.shape[0]>1:
            cc = np.corrcoef(np.transpose(X[:,temp_cs]))
            for k in range(cc.shape[0]):
                cc[k][k]=0.5
        else:
            cc = 0.5
        if np.min(cc)>=0.5:
            # all_cs.append(temp_cs)
            #remove found variables
            X_train_raw = np.delete(X_train_raw, temp_cs, axis = 1)
            X_test = np.delete(X_test, temp_cs, axis = 1)
            input_size = X_train_raw.shape[1]
            #add cs with correct idx
            nsnp = temp_to_add.shape[0]
            for temp_j in range(nsnp):
                cs_idx = nsnp-temp_j
                if sum(np.sort(temp_to_add)[cs_idx:])>0.95:
                    break
            temp_cs = np.argsort(temp_to_add)[cs_idx:]
            all_cs.append(temp_cs)
    if len(all_cs)>0:
        temp_init_val = tf.random.truncated_normal([input_size, 1], mean=0.0, stddev=0.1, dtype=tf.dtypes.float32)
    else:
        temp_init_val = init_vals[0]
    model = SNN(model_type, reg_type, sigma, input_size, output_size, hidden_sizes, temperature, tau, False, temp_init_val)
    iteration+=1

  super(Adam, self).__init__(name, **kwargs)


Iteration 0
Train loss 1.2711823
l 0
mini loss 0.9860114455223083
Test loss 1.2286781
#################################################################################################################
(array([], dtype=int64),)
(array([53]),)
Iteration 0
Train loss 1.058028
l 0
mini loss 0.9860114455223083
Test loss 1.0436525
#################################################################################################################
(array([82]),)
(array([82]),)
Iteration 0
Train loss 1.1758631
l 0
mini loss 0.9860114455223083
Test loss 1.2014663
#################################################################################################################
(array([82]),)
(array([82]),)
Iteration 0
Train loss 1.1995499
l 0
mini loss 0.9860114455223083
Test loss 1.1968491
#################################################################################################################
(array([82]),)
(array([82]),)
Iteration 0
Train loss 1.2753196
l 0
mini loss 0.9860114455223083
Test

In [22]:
np.mean(tf.losses.MSE(pred[1,:,0], Y_test))

0.70828384

In [24]:
np.mean(tf.losses.MSE(pred[1], Y_test))

1.2414048

In [25]:
pred[1,:,0].shape

TensorShape([750])

In [26]:
pred[1].shape

TensorShape([750, 1])

In [32]:
a = np.mean(pred, axis = 0) - Y_test

In [33]:
b = np.mean(pred[:,:,0], axis = 0) - Y_test

np.mean(a = b)

0.016114369

In [43]:
np.subtract(np.mean(pred[:,:,0], axis = 0), Y_test).shape

(750,)

In [6]:
all_cs

[]

In [None]:
#write data
all_myloss = np.asarray(all_myloss)
all_prbs = np.asarray(all_prbs)
import pickle
all_data = {}
all_data['loss'] = all_myloss
all_data['pips'] = all_prbs
all_data['cs'] = all_cs