In [1]:
import tensorflow as tf
import numpy as np

In [18]:
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [92]:
X_train.shape

(60000, 28, 28)

In [152]:
class SSRegularizer(tf.keras.regularizers.Regularizer):
    def __init__(self, l1):
        self.l1 = l1

    def __call__(self, x):
        scaling_matrix = tf.cumsum(tf.constant(self.l1, shape=x.shape), axis=1) - self.l1
        return self.l1 * tf.reduce_sum(tf.multiply(scaling_matrix, tf.abs(x)))

    def get_config(self):
        return {'l1': float(self.l1)}


class SSDense(tf.keras.layers.Layer):
    def __init__(self, units, activation=None, kernel_initializer='glorot_uniform', bias_initializer='zeros', l1=0.1):
        super().__init__()
        self.units = units
        self.activation = tf.keras.activations.get(activation)
        self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
        self.bias_initializer = tf.keras.initializers.get(bias_initializer)
        self.kernel_regularizer = SSRegularizer(l1=l1)

    def build(self, input_shape):
        self.kernel = self.add_weight(
            'kernel',
            shape=(input_shape[-1], self.units),
            dtype='float32',
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer)
        
        self.bias = self.add_weight(
            'bias',
            shape=(self.units,),
            dtype='float32',
            initializer=self.bias_initializer)

    def call(self, inputs):
        return self.activation(tf.matmul(inputs, self.kernel) + self.bias)

In [153]:
input_layer = tf.keras.layers.Input(shape=[28, 28])
hidden = SSDense(50, activation='relu')

x = tf.keras.layers.Flatten()(input_layer)
x = hidden(x)
output = tf.keras.layers.Dense(10, activation='softmax')(x)

model = tf.keras.Model(inputs=[input_layer], outputs=[output])

In [154]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics='accuracy')

In [155]:
model.fit(X_train, y_train, epochs=5, validation_data=((X_test), (y_test)))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f0d157f1fd0>

In [156]:
np.amax(hidden.kernel, 0)

array([0.08386122, 0.05713188, 0.00056058, 0.01311588, 0.03093693,
       0.02525304, 0.0245307 , 0.01927312, 0.00046184, 0.0014831 ,
       0.03961012, 0.00049501, 0.00051186, 0.00044549, 0.02408647,
       0.00044314, 0.0165571 , 0.00048739, 0.00180285, 0.00060964,
       0.00080721, 0.00047223, 0.00036902, 0.00044238, 0.00118394,
       0.00061396, 0.00081335, 0.00083071, 0.00081291, 0.00052855,
       0.00059931, 0.00070067, 0.00050654, 0.00055363, 0.00052984,
       0.00059063, 0.00067837, 0.00064678, 0.00059073, 0.00085646,
       0.00046051, 0.01424969, 0.00044373, 0.00052758, 0.00062995,
       0.00061023, 0.00045959, 0.00046145, 0.00049477, 0.0006274 ],
      dtype=float32)

In [128]:
hidden.kernel

<tf.Variable 'ss_dense_13/kernel:0' shape=(784, 50) dtype=float32, numpy=
array([[ 2.19782889e-02, -8.38522101e-06,  1.75410969e-06, ...,
        -2.57352440e-05, -9.40302416e-05, -3.54066688e-05],
       [ 4.62642983e-02, -3.22027790e-06, -1.28006614e-05, ...,
        -1.63810124e-04, -8.04495357e-06,  8.38647102e-05],
       [ 4.94136587e-02, -1.51565837e-06, -2.69689117e-05, ...,
         3.41140549e-05, -1.01994199e-04,  2.84221722e-04],
       ...,
       [-6.54428005e-02,  9.53386552e-06,  3.28887909e-05, ...,
         7.01069439e-05,  1.14341339e-04, -2.56687868e-04],
       [ 5.99011704e-02, -1.09144585e-05, -5.63615067e-05, ...,
         4.51343221e-05, -1.92771317e-04, -1.97840709e-04],
       [-4.54486720e-02, -1.79859289e-05, -1.64482790e-05, ...,
        -3.69285408e-06, -8.67230483e-05, -1.00684047e-04]], dtype=float32)>

In [37]:
hidden.weights

[<tf.Variable 'dense_14/kernel:0' shape=(784, 50) dtype=float32, numpy=
 array([[-0.01770425,  0.00407095,  0.07000803, ...,  0.08013842,
         -0.07911243,  0.01101536],
        [-0.01970632,  0.00456458,  0.05945834, ...,  0.08379038,
          0.07810465, -0.08296434],
        [ 0.056421  ,  0.07724804,  0.00417263, ...,  0.0619384 ,
          0.03587858, -0.04258172],
        ...,
        [-0.06198139, -0.07481034,  0.07102262, ...,  0.00600304,
         -0.04394611,  0.03386247],
        [ 0.0122844 ,  0.0843849 ,  0.05133539, ...,  0.03520263,
         -0.05640824,  0.07732203],
        [ 0.08064725,  0.07204448,  0.05897743, ..., -0.04463573,
          0.03161991,  0.06805336]], dtype=float32)>,
 <tf.Variable 'dense_14/bias:0' shape=(50,) dtype=float32, numpy=
 array([ 0.01858092, -0.00567263, -0.07340959, -0.03671593, -0.05480817,
        -0.15721531, -0.08629031, -0.04539269, -0.03571798, -0.15483859,
         0.05694963, -0.11505955, -0.06954357, -0.0231346 , -0.12895995,
