In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

import maxjoshua as mh
import tensorflow as tf
import sklearn.preprocessing

## Load dataset

In [2]:
import sklearn.datasets
X, y = sklearn.datasets.make_regression(n_samples=1000, n_features=100, n_informative=20, n_targets=3)

In [3]:
X.shape, y.shape, y.max(), y.min()

((1000, 100), (1000, 3), 826.7673065383296, -911.2601431867922)

## Feature selection

In [4]:
%%time 
indices, values, num_in, num_out = mh.pretrain_submodels(
    sklearn.preprocessing.scale(X), 
    sklearn.preprocessing.scale(y), 
    num_out=64, n_select=3)

0 (64, 86, 95) [-0.030078640349444463, 0.221922822008917, 0.35182983749492336] 0.8855170277435118
1 (35, 38, 99) [0.3507773720369648, 0.2914976540278422, -0.06878462564374627] 0.8589709506577199
2 (10, 97, 99) [0.3967556573434297, 0.38971838784704843, -0.10468899034162711] 0.796321553934515
CPU times: user 15 s, sys: 373 ms, total: 15.4 s
Wall time: 14.1 s


## Training

In [5]:
model = tf.keras.models.Sequential([
    mh.SparseLayerAsEnsemble(
        num_in=num_in, 
        num_out=num_out, 
        sp_indices=indices, 
        sp_values=values,
        sp_trainable=False
    ),
    tf.keras.layers.Dense(
        units=3, use_bias=False,
        kernel_constraint=tf.keras.constraints.NonNeg()
    ),
    mh.InverseTransformer(
        units=3,
        init_bias=y.mean(), 
        init_scale=y.std()
    )
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(
        learning_rate=3e-4, beta_1=.9, beta_2=.999, epsilon=1e-7, amsgrad=True),
    loss='mean_squared_error'
)

history = model.fit(X, y, epochs=3)

Epoch 1/3


2022-06-21 14:09:58.017632: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 2/3
Epoch 3/3


In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sparse_layer_as_ensemble (S  (None, 64)               592       
 parseLayerAsEnsemble)                                           
                                                                 
 dense (Dense)               (None, 3)                 192       
                                                                 
 inverse_transformer (Invers  (None, 3)                6         
 eTransformer)                                                   
                                                                 
Total params: 790
Trainable params: 398
Non-trainable params: 392
_________________________________________________________________


In [7]:
y_pred = model.predict(X)



In [8]:
y_pred

array([[  32.41129  ,   86.500626 ,  -88.62109  ],
       [-115.02759  ,  -44.41147  , -128.52892  ],
       [ -80.20841  ,  -86.36415  , -127.54527  ],
       ...,
       [ -38.8014   ,   60.870422 ,   13.4240265],
       [  58.531338 ,   59.005497 ,   36.656044 ],
       [-105.05849  ,  -69.640564 , -129.83876  ]], dtype=float32)

In [9]:
y

array([[ 189.63176047,   85.18177731,  119.37223354],
       [ -99.49483064,   79.56510184, -382.50919779],
       [   4.34269488,  148.16108064,    4.13984965],
       ...,
       [-207.1902419 ,  -32.02099973,    4.98155606],
       [ -47.49634833,   -8.58288168,   44.97482191],
       [-448.19171217, -483.55431877, -225.8641919 ]])