In [58]:
import tensorflow as tf
import numpy as np

In [59]:
dtype = 'float32'
tf.keras.backend.set_floatx(dtype)

In [61]:
# fashion_mnist = tf.keras.datasets.fashion_mnist
# (X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# X_train = X_train.astype(dtype) / 255.0
# y_train = y_train.astype(dtype)
# X_test = X_test.astype(dtype)  / 255.0
# y_test = y_test.astype(dtype)

# X_train = np.reshape(X_train, (-1, 784))
# X_test = np.reshape(X_test, (-1, 784))

In [64]:
cifar10 = tf.keras.datasets.cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

X_train = X_train.astype(dtype) / 255.0
y_train = y_train.astype(dtype)
X_test = X_test.astype(dtype)  / 255.0
y_test = y_test.astype(dtype)

X_train = np.reshape(X_train, (-1, 3072))
X_test = np.reshape(X_test, (-1, 3072))

In [97]:
class SSRegularizer(tf.keras.regularizers.Regularizer):
    def __init__(self, l1):
        self.l1 = l1

    def __call__(self, x):
        scaling_vector = tf.cumsum(tf.constant(self.l1, shape=(x.shape[-1],), dtype=dtype), axis=0) - self.l1
        return tf.reduce_sum(scaling_vector * tf.abs(x))

    def get_config(self):
        return {'l1': float(self.l1)}


class SSModel(tf.keras.Model):
    def __init__(self, input_units, units, activation=None, l1=0.01, kernel_initializer='glorot_uniform', bias_initializer='zeros'):
        super().__init__()
        self.activation1 = tf.keras.activations.get(activation)
        self.activation2 = tf.keras.activations.get('softmax')
        self.l1 = l1
        self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
        self.bias_initializer = tf.keras.initializers.get(bias_initializer)
        self.regularizer = SSRegularizer(self.l1)
        
        self.W1 = tf.Variable(
            name='W1',
            initial_value=self.kernel_initializer(shape=(input_units, units), dtype=dtype),
            trainable=True)
        
        self.b1 = tf.Variable(
            name='b1',
            initial_value=self.bias_initializer(shape=(units,), dtype=dtype),
            trainable=True)
        
        self.W2 = tf.Variable(
            name='W2',
            initial_value=self.kernel_initializer(shape=(units, 10), dtype=dtype),
            trainable=True)
        
        self.b2 = tf.Variable(
            name='b2',
            initial_value=self.bias_initializer(shape=(10,), dtype=dtype),
            trainable=True)
        
        self.add_loss(lambda: self.regularizer(self.W1))
        self.add_loss(lambda: self.regularizer(self.b1))

    def call(self, inputs):
        S1 = tf.matmul(inputs, self.W1)
        A1 = self.activation1(S1 + self.b1)
        A2 = self.activation2(tf.matmul(A1, self.W2) + self.b2)

        return A2
    
    def prune(self, threshold=0.001):
        W1 = self.W1.value()
        b1 = self.b1.value()
        W2 = self.W2.value()
        
        weights_with_biases = tf.concat([W1, tf.reshape(b1, (1, -1))], axis=0)
        neurons_are_active = tf.math.reduce_max(weights_with_biases, axis=0) >= threshold
        active_neurons_indices = tf.reshape(tf.where(neurons_are_active), (-1,))
        
        new_W1 = tf.gather(W1, active_neurons_indices, axis=1)
        new_b1 = tf.gather(b1, active_neurons_indices, axis=0)
        new_W2 = tf.gather(W2, active_neurons_indices, axis=0)
        
        self.W1 = tf.Variable(name='W1', initial_value=new_W1, trainable=True)
        self.b1 = tf.Variable(name='b1', initial_value=new_b1, trainable=True)
        self.W2 = tf.Variable(name='W2', initial_value=new_W2, trainable=True)
    
    def grow(self, min_new_neurons=5, scaling_factor=0.001):      
        W1 = self.W1.value()
        b1 = self.b1.value()
        W2 = self.W2.value()
        
        n_new_neurons = max(min_new_neurons, int(W1.shape[1] * 0.2))
        
        W1_growth = self.kernel_initializer(shape=(W1.shape[0], W1.shape[1] + n_new_neurons), dtype=dtype)[:, -n_new_neurons:] * scaling_factor
        b1_growth = self.kernel_initializer(shape=(n_new_neurons,), dtype=dtype)
        W2_growth = self.kernel_initializer(shape=(W2.shape[0] + n_new_neurons, W2.shape[1]), dtype=dtype)[-n_new_neurons:, :]
        
        new_W1 = tf.concat([W1, W1_growth], axis=1)
        new_b1 = tf.concat([b1, b1_growth], axis=0)
        new_W2 = tf.concat([W2, W2_growth], axis=0)
        
        self.W1 = tf.Variable(name='W1', initial_value=new_W1, trainable=True)
        self.b1 = tf.Variable(name='b1', initial_value=new_b1, trainable=True)
        self.W2 = tf.Variable(name='W2', initial_value=new_W2, trainable=True)

In [107]:
def get_param_string(weights):
    param_string = ""
    max_parameters = tf.math.reduce_max(tf.abs(weights), axis=0).numpy()
    magnitudes = np.floor(np.log10(max_parameters))
    for m in magnitudes:
        if m > 0:
            m = 0
        param_string += str(int(-m))
    return param_string


def print_epoch_statistics(model):
    y_pred = model(X_train)
    loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y_train, y_pred))
    accuracy = tf.reduce_mean(tf.keras.metrics.sparse_categorical_accuracy(y_train, y_pred))
    
    y_pred_val = model(X_test)
    val_loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y_test, y_pred_val))
    val_accuracy = tf.reduce_mean(tf.keras.metrics.sparse_categorical_accuracy(y_test, y_pred_val))
    print(f"loss: {loss} - accuracy: {accuracy} - val_loss: {val_loss} - val_accuracy: {val_accuracy}")
    print(f"units: {model.W1.shape[1]} - {get_param_string(model.W1)}")
    

def train_model(model, optimizer, epochs, batch_size, train_dataset):
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")
        
        print("Before growing:")
        print_epoch_statistics(model)
        model.grow(min_new_neurons=20, scaling_factor=0.001)
        print("After growing:")
        print_epoch_statistics(model)

        for step, (x_batch, y_batch) in enumerate(train_dataset):
            with tf.GradientTape() as tape:
                y_pred = model(x_batch, training=True)
                loss_value = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y_batch, y_pred))
                loss_value += sum(model.losses)

            grads = tape.gradient(loss_value, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
        print("Before pruning:")
        print_epoch_statistics(model)
        model.prune(threshold=0.001)
        print("After pruning:")
        print_epoch_statistics(model)

In [79]:
epochs = 20
batch_size = 32

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

model = SSModel(input_units=3072, units=200, activation='relu', l1=0.000001, kernel_initializer='he_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

train_model(model, optimizer, epochs, batch_size, train_dataset)

Epoch 1/20
Before growing:
loss: 2.3859381675720215 - accuracy: 0.10491999983787537 - val_loss: 2.3854541778564453 - val_accuracy: 0.1080000028014183
units: 200 - 22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222
After growing:
loss: 2.381791830062866 - accuracy: 0.09839999675750732 - val_loss: 2.3812601566314697 - val_accuracy: 0.10130000114440918
units: 240 - 222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222225555555555555555555555555555555555555555
Before pruning:
loss: 1.9336285591125488 - accuracy: 0.3240399956703186 - val_loss: 1.9357273578643799 - val_accuracy: 0.31940001249313354
units: 240 - 11222221222121222222222212222212222222241122322222223222322222223122233232232323122222322122

loss: 1.6416640281677246 - accuracy: 0.4109399914741516 - val_loss: 1.6657218933105469 - val_accuracy: 0.40470001101493835
units: 28 - 1111111111222222222223222222
Epoch 11/20
Before growing:
loss: 1.6416640281677246 - accuracy: 0.4109399914741516 - val_loss: 1.6657218933105469 - val_accuracy: 0.40470001101493835
units: 28 - 1111111111222222222223222222
After growing:
loss: 1.6598081588745117 - accuracy: 0.4021199941635132 - val_loss: 1.6843949556350708 - val_accuracy: 0.3921000063419342
units: 33 - 111111111122222222222322222255555
Before pruning:
loss: 1.6390280723571777 - accuracy: 0.41600000858306885 - val_loss: 1.666709542274475 - val_accuracy: 0.40880000591278076
units: 33 - 111111111122222222222222222222222
After pruning:
loss: 1.6390280723571777 - accuracy: 0.41600000858306885 - val_loss: 1.666709542274475 - val_accuracy: 0.40880000591278076
units: 28 - 1111111111222222222222222222
Epoch 12/20
Before growing:
loss: 1.6390280723571777 - accuracy: 0.41600000858306885 - val_loss: 

In [86]:
epochs = 20
batch_size = 32

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

model = SSModel(input_units=3072, units=50, activation='relu', l1=0.000001, kernel_initializer='he_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

train_model(model, optimizer, epochs, batch_size, train_dataset)

Epoch 1/20
Before growing:
loss: 2.640031337738037 - accuracy: 0.10174000263214111 - val_loss: 2.6367788314819336 - val_accuracy: 0.09929999709129333
units: 50 - 22222222222222222222222222222222222222222222222222
After growing:
loss: 2.62612247467041 - accuracy: 0.1012599989771843 - val_loss: 2.6229114532470703 - val_accuracy: 0.0989999994635582
units: 60 - 222222222222222222222222222222222222222222222222225555555555
Before pruning:
loss: 1.9433270692825317 - accuracy: 0.29401999711990356 - val_loss: 1.9443687200546265 - val_accuracy: 0.29409998655319214
units: 60 - 212222122222222122212222112211231222322212222223222332232232
After pruning:
loss: 1.9433270692825317 - accuracy: 0.29401999711990356 - val_loss: 1.9443687200546265 - val_accuracy: 0.29409998655319214
units: 53 - 21222212222222212212222112211231222221222222222322222
Epoch 2/20
Before growing:
loss: 1.9433270692825317 - accuracy: 0.29401999711990356 - val_loss: 1.9443687200546265 - val_accuracy: 0.29409998655319214
units: 53 

loss: 1.9583690166473389 - accuracy: 0.27379998564720154 - val_loss: 1.9747564792633057 - val_accuracy: 0.2653000056743622
units: 22 - 2111222222222222255555
Before pruning:
loss: 1.8539695739746094 - accuracy: 0.31224000453948975 - val_loss: 1.8709502220153809 - val_accuracy: 0.30000001192092896
units: 22 - 2111222222222222222222
After pruning:
loss: 1.8539695739746094 - accuracy: 0.31224000453948975 - val_loss: 1.8709502220153809 - val_accuracy: 0.30000001192092896
units: 22 - 2111222222222222222222
Epoch 13/20
Before growing:
loss: 1.8539695739746094 - accuracy: 0.31224000453948975 - val_loss: 1.8709502220153809 - val_accuracy: 0.30000001192092896
units: 22 - 2111222222222222222222
After growing:
loss: 1.8569411039352417 - accuracy: 0.30946001410484314 - val_loss: 1.8739542961120605 - val_accuracy: 0.30160000920295715
units: 27 - 211122222222222222222255555
Before pruning:
loss: 1.8289430141448975 - accuracy: 0.32458001375198364 - val_loss: 1.8461039066314697 - val_accuracy: 0.31029

In [87]:
epochs = 20
batch_size = 32

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

model = SSModel(input_units=3072, units=500, activation='relu', l1=0.000001, kernel_initializer='he_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

train_model(model, optimizer, epochs, batch_size, train_dataset)

Epoch 1/20
Before growing:
loss: 2.4856791496276855 - accuracy: 0.09743999689817429 - val_loss: 2.4817655086517334 - val_accuracy: 0.09830000251531601
units: 500 - 22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222
After growing:
loss: 2.4845969676971436 - accuracy: 0.0980599969625473 - val_loss: 2.480679988861084 - val_accuracy: 0.09889999777078629
units: 600 - 22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222

Before pruning:
loss: 1.694265365600586 - accuracy: 0.3941600024700165 - val_loss: 1.7167106866836548 - val_accuracy: 0.38600000739097595
units: 23 - 21111111122222222243322
After pruning:
loss: 1.694265365600586 - accuracy: 0.3941600024700165 - val_loss: 1.7167106866836548 - val_accuracy: 0.38600000739097595
units: 18 - 211111111222222332
Epoch 9/20
Before growing:
loss: 1.694265365600586 - accuracy: 0.3941600024700165 - val_loss: 1.7167106866836548 - val_accuracy: 0.38600000739097595
units: 18 - 211111111222222332
After growing:
loss: 1.7245169878005981 - accuracy: 0.37700000405311584 - val_loss: 1.7452995777130127 - val_accuracy: 0.3643999993801117
units: 23 - 21111111122222233255555
Before pruning:
loss: 1.6822009086608887 - accuracy: 0.39816001057624817 - val_loss: 1.7084351778030396 - val_accuracy: 0.38940000534057617
units: 23 - 21111111122222222222232
After pruning:
loss: 1.6822009086608887 - accuracy: 0.39816001057624817 - val_loss: 1.7084351778030396 - val_accuracy: 0.3894000

loss: 1.6970937252044678 - accuracy: 0.39236000180244446 - val_loss: 1.735567569732666 - val_accuracy: 0.3720000088214874
units: 27 - 211100111222222222222355555
Before pruning:
loss: 1.6508679389953613 - accuracy: 0.4022800028324127 - val_loss: 1.695644497871399 - val_accuracy: 0.38339999318122864
units: 27 - 211100111222222222222222222
After pruning:
loss: 1.6508679389953613 - accuracy: 0.4022800028324127 - val_loss: 1.695644497871399 - val_accuracy: 0.38339999318122864
units: 25 - 2111001112222222222222222


In [91]:
epochs = 20
batch_size = 32

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

model = SSModel(input_units=3072, units=25, activation='relu', l1=0.000001, kernel_initializer='he_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

train_model(model, optimizer, epochs, batch_size, train_dataset)

Epoch 1/20
Before growing:
loss: 2.617313861846924 - accuracy: 0.09611999988555908 - val_loss: 2.6195871829986572 - val_accuracy: 0.09650000184774399
units: 25 - 2222222222222222222222222
After growing:
loss: 2.6243700981140137 - accuracy: 0.09600000083446503 - val_loss: 2.6267757415771484 - val_accuracy: 0.09719999879598618
units: 30 - 222222222222222222222222255555
Before pruning:
loss: 2.0064220428466797 - accuracy: 0.24442000687122345 - val_loss: 2.0065321922302246 - val_accuracy: 0.24120000004768372
units: 30 - 221122222222222222222222235222
After pruning:
loss: 2.0064220428466797 - accuracy: 0.24442000687122345 - val_loss: 2.0065321922302246 - val_accuracy: 0.24120000004768372
units: 23 - 22112222222222222222222
Epoch 2/20
Before growing:
loss: 2.0064220428466797 - accuracy: 0.24442000687122345 - val_loss: 2.0065321922302246 - val_accuracy: 0.24120000004768372
units: 23 - 22112222222222222222222
After growing:
loss: 2.111586332321167 - accuracy: 0.19701999425888062 - val_loss: 2.

loss: 1.9087944030761719 - accuracy: 0.2717599868774414 - val_loss: 1.911544919013977 - val_accuracy: 0.2702000141143799
units: 18 - 211222222222222222
After growing:
loss: 1.9398106336593628 - accuracy: 0.2578200101852417 - val_loss: 1.942226529121399 - val_accuracy: 0.2556999921798706
units: 23 - 21122222222222222255555
Before pruning:
loss: 1.9127790927886963 - accuracy: 0.2662599980831146 - val_loss: 1.9164273738861084 - val_accuracy: 0.26350000500679016
units: 23 - 21122222222222222222222
After pruning:
loss: 1.9127790927886963 - accuracy: 0.2662599980831146 - val_loss: 1.9164273738861084 - val_accuracy: 0.26350000500679016
units: 20 - 21122222222222222222
Epoch 14/20
Before growing:
loss: 1.9127790927886963 - accuracy: 0.2662599980831146 - val_loss: 1.9164273738861084 - val_accuracy: 0.26350000500679016
units: 20 - 21122222222222222222
After growing:
loss: 1.9519459009170532 - accuracy: 0.24383999407291412 - val_loss: 1.9553859233856201 - val_accuracy: 0.23989999294281006
units: 

In [99]:
epochs = 20
batch_size = 32

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

model = SSModel(input_units=3072, units=25, activation='relu', l1=0.000001, kernel_initializer='he_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

train_model(model, optimizer, epochs, batch_size, train_dataset)

Epoch 1/20
Before growing:
loss: 2.475613832473755 - accuracy: 0.10493999719619751 - val_loss: 2.4778172969818115 - val_accuracy: 0.10530000180006027
units: 25 - 2222222222222222222222222
After growing:
loss: 2.522550344467163 - accuracy: 0.10698000341653824 - val_loss: 2.5250914096832275 - val_accuracy: 0.10840000212192535
units: 75 - 222222222222222222222222255555555555555555555555555555555555555555555555555
Before pruning:
loss: 1.9480037689208984 - accuracy: 0.282260000705719 - val_loss: 1.9536405801773071 - val_accuracy: 0.27810001373291016
units: 75 - 222222222222222222223222222325326222233232221222252332233123322322222133242
After pruning:
loss: 1.9480037689208984 - accuracy: 0.282260000705719 - val_loss: 1.9536405801773071 - val_accuracy: 0.27810001373291016
units: 51 - 222222222222222222222222322222221222223132322221322
Epoch 2/20
Before growing:
loss: 1.9480037689208984 - accuracy: 0.282260000705719 - val_loss: 1.9536405801773071 - val_accuracy: 0.27810001373291016
units: 51 

loss: 1.7588304281234741 - accuracy: 0.36215999722480774 - val_loss: 1.770375370979309 - val_accuracy: 0.3596999943256378
units: 108 - 211121222222221222222322222322222322233222222232222332223355555555555555555555555555555555555555555555555555
Before pruning:
loss: 1.744492530822754 - accuracy: 0.36607998609542847 - val_loss: 1.757424235343933 - val_accuracy: 0.3626999855041504
units: 108 - 211121222222231222232222222232322223323233322222222332223323452525542223333325222334554253353223333223422253
After pruning:
loss: 1.744492530822754 - accuracy: 0.36607998609542847 - val_loss: 1.757424235343933 - val_accuracy: 0.3626999855041504
units: 49 - 2111122221222232222322232322222323222232222323222
Epoch 11/20
Before growing:
loss: 1.744492530822754 - accuracy: 0.36607998609542847 - val_loss: 1.757424235343933 - val_accuracy: 0.3626999855041504
units: 49 - 2111122221222232222322232322222323222232222323222
After growing:
loss: 1.7588422298431396 - accuracy: 0.3574199974536896 - val_loss: 1.771

Before pruning:
loss: 1.7375706434249878 - accuracy: 0.36687999963760376 - val_loss: 1.7597156763076782 - val_accuracy: 0.3529999852180481
units: 99 - 211111222322222223222222222222522212533323522222232333322223222222324223332222222222325222332332255
After pruning:
loss: 1.7375706434249878 - accuracy: 0.36687999963760376 - val_loss: 1.7597156763076782 - val_accuracy: 0.3529999852180481
units: 55 - 2111112222223222222222221232222223222222222222222322322
Epoch 20/20
Before growing:
loss: 1.7375706434249878 - accuracy: 0.36687999963760376 - val_loss: 1.7597156763076782 - val_accuracy: 0.3529999852180481
units: 55 - 2111112222223222222222221232222223222222222222222322322
After growing:
loss: 1.7363173961639404 - accuracy: 0.36796000599861145 - val_loss: 1.7585843801498413 - val_accuracy: 0.3513000011444092
units: 105 - 211111222222322222222222123222222322222222222222232232255555555555555555555555555555555555555555555555555
Before pruning:
loss: 1.7085301876068115 - accuracy: 0.377079993486

In [108]:
epochs = 20
batch_size = 32

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

model = SSModel(input_units=3072, units=25, activation='relu', l1=0.000001, kernel_initializer='he_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

train_model(model, optimizer, epochs, batch_size, train_dataset)

Epoch 1/20
Before growing:
loss: 2.506462574005127 - accuracy: 0.07035999745130539 - val_loss: 2.5072879791259766 - val_accuracy: 0.06800000369548798
units: 25 - 2222222222222222222222222
After growing:
loss: 2.5550343990325928 - accuracy: 0.0809599980711937 - val_loss: 2.5560426712036133 - val_accuracy: 0.07760000228881836
units: 45 - 222222222222222222222222255555555555555555555
Before pruning:
loss: 2.0944952964782715 - accuracy: 0.18459999561309814 - val_loss: 2.0965957641601562 - val_accuracy: 0.1809999942779541
units: 45 - 222222222222222222222222232232223132222222322
After pruning:
loss: 2.0944952964782715 - accuracy: 0.18459999561309814 - val_loss: 2.0965957641601562 - val_accuracy: 0.1809999942779541
units: 32 - 22222222222222222222222212222222
Epoch 2/20
Before growing:
loss: 2.0944952964782715 - accuracy: 0.18459999561309814 - val_loss: 2.0965957641601562 - val_accuracy: 0.1809999942779541
units: 32 - 22222222222222222222222212222222
After growing:
loss: 2.1181905269622803 -

loss: 1.94565749168396 - accuracy: 0.24427999556064606 - val_loss: 1.9494240283966064 - val_accuracy: 0.2409999966621399
units: 28 - 2212122222222222222222222222
Epoch 12/20
Before growing:
loss: 1.94565749168396 - accuracy: 0.24427999556064606 - val_loss: 1.9494240283966064 - val_accuracy: 0.2409999966621399
units: 28 - 2212122222222222222222222222
After growing:
loss: 1.9485472440719604 - accuracy: 0.24026000499725342 - val_loss: 1.9525275230407715 - val_accuracy: 0.24130000174045563
units: 48 - 221212222222222222222222222255555555555555555555
Before pruning:
loss: 1.8469734191894531 - accuracy: 0.3240000009536743 - val_loss: 1.8547630310058594 - val_accuracy: 0.3131999969482422
units: 48 - 221212222122322222222222222222422222244233222222
After pruning:
loss: 1.8469734191894531 - accuracy: 0.3240000009536743 - val_loss: 1.8547630310058594 - val_accuracy: 0.3131999969482422
units: 34 - 2121222123222222222222222222322222
Epoch 13/20
Before growing:
loss: 1.8469734191894531 - accuracy: 

In [109]:
epochs = 20
batch_size = 32

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

model = SSModel(input_units=3072, units=1000, activation='relu', l1=0.000001, kernel_initializer='he_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

train_model(model, optimizer, epochs, batch_size, train_dataset)

Epoch 1/20
Before growing:
loss: 2.5419509410858154 - accuracy: 0.09911999851465225 - val_loss: 2.54484224319458 - val_accuracy: 0.09870000183582306
units: 1000 - 222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222

loss: 1.6703693866729736 - accuracy: 0.4007200002670288 - val_loss: 1.684753179550171 - val_accuracy: 0.3937000036239624
units: 31 - 2111111112211222222222222322232
After growing:
loss: 1.6901593208312988 - accuracy: 0.3840000033378601 - val_loss: 1.7043949365615845 - val_accuracy: 0.3846000134944916
units: 51 - 211111111221122222222222232223255555555555555555555
Before pruning:
loss: 1.6928611993789673 - accuracy: 0.387580007314682 - val_loss: 1.7058708667755127 - val_accuracy: 0.38280001282691956
units: 51 - 211111111222122222232222222322222352222242235232222
After pruning:
loss: 1.6928611993789673 - accuracy: 0.387580007314682 - val_loss: 1.7058708667755127 - val_accuracy: 0.38280001282691956
units: 30 - 211111111122222222222222223232
Epoch 7/20
Before growing:
loss: 1.6928611993789673 - accuracy: 0.387580007314682 - val_loss: 1.7058708667755127 - val_accuracy: 0.38280001282691956
units: 30 - 211111111122222222222222223232
After growing:
loss: 1.695809245109558 - accuracy: 0.3872599

loss: 1.6451581716537476 - accuracy: 0.40338000655174255 - val_loss: 1.6765892505645752 - val_accuracy: 0.39559999108314514
units: 31 - 2111111112222222222222222222222
Epoch 17/20
Before growing:
loss: 1.6451581716537476 - accuracy: 0.40338000655174255 - val_loss: 1.6765892505645752 - val_accuracy: 0.39559999108314514
units: 31 - 2111111112222222222222222222222
After growing:
loss: 1.6524584293365479 - accuracy: 0.399399995803833 - val_loss: 1.6840482950210571 - val_accuracy: 0.387800008058548
units: 51 - 211111111222222222222222222222255555555555555555555
Before pruning:
loss: 1.6445962190628052 - accuracy: 0.40154001116752625 - val_loss: 1.6789602041244507 - val_accuracy: 0.38370001316070557
units: 51 - 211111111222222222222223222222222322422523533223522
After pruning:
loss: 1.6445962190628052 - accuracy: 0.40154001116752625 - val_loss: 1.6789602041244507 - val_accuracy: 0.38370001316070557
units: 37 - 2111111112222222222222222222222222222
Epoch 18/20
Before growing:
loss: 1.64459621

In [83]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(22, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='he_normal')
])

In [84]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

In [85]:
model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f5315e2fd60>

In [88]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(25, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='he_normal')
])

In [89]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

In [90]:
model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f53175c7070>

In [92]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(19, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='he_normal')
])

In [93]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

In [94]:
model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f5313c4cb20>

In [103]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(59, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='he_normal')
])

In [104]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

In [106]:
model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f5317736a60>

In [110]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(35, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='he_normal')
])

In [111]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

In [112]:
model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f5311a0b910>

In [125]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(50, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='he_normal')
])

In [126]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

In [127]:
model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f530ed1cca0>