In [1]:
# Enable autoreload for customized module and some global settings
%load_ext autoreload
%autoreload 1
%matplotlib inline
#np.set_printoptions(suppress=True)

import logging, argparse, os, sys
import numpy as np
import tensorflow as tf
import scipy, importlib, pprint, matplotlib.pyplot as plt, warnings
import seaborn as sns
# import glmnet_python
# from glmnet import glmnet; from glmnetPlot import glmnetPlot
# from glmnetPrint import glmnetPrint; from glmnetCoef import glmnetCoef; from glmnetPredict import glmnetPredict
# from cvglmnet import cvglmnet; from cvglmnetCoef import cvglmnetCoef
# from cvglmnetPlot import cvglmnetPlot; from cvglmnetPredict import cvglmnetPredict
# from statsmodels.stats.weightstats import DescrStatsW
# from glmnetSet import glmnetSet
warnings.filterwarnings('ignore')
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

# Package need to be reloaded frequently
sys.path.append('./code')
%aimport data_generator, baseline
from data_generator import generate_toy_example
from data_generator import linear_model_generation
from data_generator import generate_group_data
from baseline import Lasso
from alg_baselines import f_bl_Ulasso, f_bl_IIlasso
%aimport model, utils
from model import variable_decorrelation

ModuleNotFoundError: No module named 'tensorflow'

## 1. Different Colinearity Structre

In [7]:
def generate_function_data(decay):
    X = np.random.normal(0, 1, (1000, 4))
    f = np.sum(X, axis=1, keepdims=True) + np.random.normal(0, decay, (1000, 1))
    return preprocessing.scale(np.hstack((X, f)))

In [9]:
sample_size, dimension = 1000, 5
decay_list = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]
X_ind = np.random.normal(0, 1, (sample_size, dimension))
print(np.linalg.cond(np.corrcoef(X_ind.T)))
for ind, decay in enumerate(decay_list):
    X_pair = generate_group_data(sample_size, decay, group_size=dimension, group_num=1)
    X_func = generate_function_data(decay)
    print(decay)
    print(utils.weighted_corrcoef(X_pair))
    print(utils.weighted_corrcoef(X_func))

1.1916849762321629
0.001
(array([[1.        , 0.9999985 , 0.99999718, 0.99999594, 0.99999468],
       [0.9999985 , 1.        , 0.99999853, 0.99999719, 0.99999568],
       [0.99999718, 0.99999853, 1.        , 0.99999855, 0.99999706],
       [0.99999594, 0.99999719, 0.99999855, 1.        , 0.99999845],
       [0.99999468, 0.99999568, 0.99999706, 0.99999845, 1.        ]]), 0.9999977398529771, 5763658.202747467)
(array([[ 1.        , -0.04013495,  0.025675  ,  0.0098658 ,  0.47591353],
       [-0.04013495,  1.        ,  0.03132502,  0.02686719,  0.50194908],
       [ 0.025675  ,  0.03132502,  1.        ,  0.00572748,  0.53907014],
       [ 0.0098658 ,  0.02686719,  0.00572748,  1.        ,  0.51204141],
       [ 0.47591353,  0.50194908,  0.53907014,  0.51204141,  1.        ]]), 0.3670639764918985, 16173845.95724764)
0.005
(array([[1.        , 0.99996255, 0.99992946, 0.99989849, 0.99986691],
       [0.99996255, 1.        , 0.99996315, 0.99992967, 0.99989191],
       [0.99992946, 0.99996315,

In [15]:
decay = 0.05
X_group = generate_group_data(sample_size = 1000, decay = decay, group_num=1)
Y_group = None
for weight in [2e-3, 5e-3, 2e-2, 5e-2, 1e-1]:
    learning_rate, weight_l2, weight_upper = 1e-4, weight, 0
    log_name = 'collinearity_analysis_l2_%.0e' % (weight_l2) #'group_decay_%.0e_l2_%.0e_lr_%.0e' % (decay, weight_l2, learning_rate)
    data_description = 'Group data (5 var) with decay %.0e' % (decay)
    _ = variable_decorrelation(x = X_group, y = Y_group, log_name = log_name, data_description = data_description,
                   learning_rate = learning_rate, weight_l2 = weight_l2, weight_upper = weight_upper, 
                   max_iter = 15000, display_iter=300, save_iter=750, mode='rotation')

In [14]:
utils.weighted_corrcoef(X_group, np.ones((X_group.shape[0], 1)))

(array([[1.        , 0.99627017, 0.99296328, 0.98998909, 0.98689714],
        [0.99627017, 1.        , 0.99633114, 0.99307509, 0.98937346],
        [0.99296328, 0.99633114, 1.        , 0.99644472, 0.99277208],
        [0.98998909, 0.99307509, 0.99644472, 1.        , 0.99620223],
        [0.98689714, 0.98937346, 0.99277208, 0.99620223, 1.        ]]),
 0.994425471185246,
 2316.871695054794)

In [19]:
decay = 0.05
X_group = generate_group_data(sample_size = 1000, decay = 0.05, group_num=1)
model_path = 'model/collinearity_analysis/model_iters15000.ckpt'
w_opt = model.load_weight(X_group, None, model_path)
print(utils.weighted_corrcoef(X_group, w_opt))

INFO:tensorflow:Restoring parameters from model/collinearity_analysis/model_iters15000.ckpt
(array([[1.        , 0.99302146, 0.98862177, 0.98568818, 0.97990144],
       [0.99302146, 1.        , 0.99397133, 0.99053145, 0.98386579],
       [0.98862177, 0.99397133, 1.        , 0.99469257, 0.98802667],
       [0.98568818, 0.99053145, 0.99469257, 1.        , 0.99256711],
       [0.97990144, 0.98386579, 0.98802667, 0.99256711, 1.        ]]), 0.9912710216731685, 1261.9708653557225)


In [21]:
model_path = 'model/collinearity_analysis_l2_2e-03/model_iters15000.ckpt'
w_opt = model.load_weight(X_group, None, model_path)
print(utils.weighted_corrcoef(X_group, w_opt))

INFO:tensorflow:Restoring parameters from model/collinearity_analysis_l2_2e-03/model_iters15000.ckpt
(array([[1.        , 0.98636704, 0.97998684, 0.97715479, 0.96144329],
       [0.98636704, 1.        , 0.98788643, 0.98431726, 0.96652277],
       [0.97998684, 0.98788643, 1.        , 0.99045679, 0.97284525],
       [0.97715479, 0.98431726, 0.99045679, 1.        , 0.98101527],
       [0.96144329, 0.96652277, 0.97284525, 0.98101527, 1.        ]]), 0.9830396589738831, 602.3889413057892)


In [22]:
model_path = 'model/collinearity_analysis_l2_5e-03/model_iters15000.ckpt'
w_opt = model.load_weight(X_group, None, model_path)
print(utils.weighted_corrcoef(X_group, w_opt))

INFO:tensorflow:Restoring parameters from model/collinearity_analysis_l2_5e-03/model_iters15000.ckpt
(array([[1.        , 0.99040111, 0.98505065, 0.98207988, 0.9738069 ],
       [0.99040111, 1.        , 0.99193723, 0.98823935, 0.97879181],
       [0.98505065, 0.99193723, 1.        , 0.99326009, 0.983797  ],
       [0.98207988, 0.98823935, 0.99326009, 1.        , 0.98944347],
       [0.9738069 , 0.97879181, 0.983797  , 0.98944347, 1.        ]]), 0.9885445996208548, 931.6542319680349)


In [23]:
model_path = 'model/collinearity_analysis_l2_5e-02/model_iters15000.ckpt'
w_opt = model.load_weight(X_group, None, model_path)
print(utils.weighted_corrcoef(X_group, w_opt))

INFO:tensorflow:Restoring parameters from model/collinearity_analysis_l2_5e-02/model_iters15000.ckpt
(array([[1.        , 0.99559669, 0.99215545, 0.98928517, 0.98565044],
       [0.99559669, 1.        , 0.99586515, 0.99268721, 0.98841232],
       [0.99215545, 0.99586515, 1.        , 0.99613811, 0.99189331],
       [0.98928517, 0.99268721, 0.99613811, 1.        , 0.99544556],
       [0.98565044, 0.98841232, 0.99189331, 0.99544556, 1.        ]]), 0.9938503529299425, 1962.8968523259398)


In [24]:
model_path = 'model/collinearity_analysis_l2_1e-01/model_iters15000.ckpt'
w_opt = model.load_weight(X_group, None, model_path)
print(utils.weighted_corrcoef(X_group, w_opt))

INFO:tensorflow:Restoring parameters from model/collinearity_analysis_l2_1e-01/model_iters15000.ckpt
(array([[1.        , 0.9959316 , 0.99257001, 0.98965638, 0.9862916 ],
       [0.9959316 , 1.        , 0.99609889, 0.99289507, 0.98890954],
       [0.99257001, 0.99609889, 1.        , 0.99629749, 0.9923433 ],
       [0.98965638, 0.99289507, 0.99629749, 1.        , 0.99582083],
       [0.9862916 , 0.98890954, 0.9923433 , 0.99582083, 1.        ]]), 0.9941451765828936, 2121.806970348623)
