In [5]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Fri Aug 20 19:55:06 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    40W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn

%matplotlib inline

In [7]:
dtype = 'float32'
tf.keras.backend.set_floatx(dtype)

In [8]:
cifar10 = tf.keras.datasets.cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

X_train = X_train.astype(dtype) / 255.0
y_train = y_train.astype(dtype)
X_test = X_test.astype(dtype)  / 255.0
y_test = y_test.astype(dtype)

X_train = np.reshape(X_train, (-1, 3072))
X_test = np.reshape(X_test, (-1, 3072))

X = np.concatenate((X_train, X_test), axis=0)
y = np.concatenate((y_train, y_test), axis=0)

In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)

X_norm = scaler.transform(X)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

In [10]:
from sklearn.model_selection import train_test_split

automobile_indices = np.where(y == 1)[0]
dog_indices = np.where(y == 5)[0]

X_norm_automobiles = X_norm[automobile_indices]
X_norm_dogs = X_norm[dog_indices]

y_automobiles = y[automobile_indices]
y_dogs = y[dog_indices]

X_norm_automobiles_dogs = np.concatenate((X_norm_automobiles, X_norm_dogs), axis=0)
y_automobiles_dogs = np.concatenate((y_automobiles, y_dogs), axis=0)

X_norm_automobiles_dogs_train, X_norm_automobiles_dogs_test, y_automobiles_dogs_train, y_automobiles_dogs_test = train_test_split(X_norm_automobiles_dogs, y_automobiles_dogs, test_size=0.3, random_state=42)

In [11]:
class SSRegularizer(tf.keras.regularizers.Regularizer):
    def __init__(self, regularization_penalty, regularization_method):
        self.regularization_penalty = regularization_penalty
        self.regularization_method = regularization_method

    def __call__(self, x):
        if self.regularization_method == 'weighted_l1':
            return self.weighted_l1(x)
        elif self.regularization_method == 'group_sparsity':
            return self.group_sparsity(x)
        else:
            raise NotImplementedError(f"Unknown regularization method {self.regularization_method}")
    
    def weighted_l1(self, x):
        # I.e. for a parameter matrix of 4 input and 10 output neurons:
        #
        # [[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        #  [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        #  [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        #  [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]
        #
        # The scaling vector could be [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
        # and the resulting weighted values could be
        #
        # [[0., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
        #  [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
        #  [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
        #  [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]]
        #
        # Therefore every additional output neuron is regularized more.

        scaling_vector = tf.cumsum(tf.constant(self.regularization_penalty, shape=(x.shape[-1],), dtype=dtype), axis=0)
        weighted_values = scaling_vector * tf.abs(x)
        return tf.reduce_sum(weighted_values)
    
    def group_sparsity(self, x):
        # I.e. for a parameter matrix of 3 input and 5 output neurons:
        #
        # [[1., 1., 1., 1., 1.],
        #  [1., 2., 2., 1., 2.],
        #  [2., 2., 3., 1., 3.]]
        #
        # The resulting vector of group norms is [2., 2., 3., 1., 3.], therefore for
        # every output neuron, its incoming connections form a group.

        group_norms = tf.norm(x, ord=2, axis=0)
        # assert group_norms.shape[0] == x.shape[1]
        return self.regularization_penalty * tf.reduce_sum(group_norms)

    def get_config(self):
        return {'regularization_penalty': float(self.regularization_penalty)}


class SSLayer(tf.keras.Model):
    def __init__(self, input_units, units, activation, regularization_penalty, regularization_method, kernel_initializer, bias_initializer, regularize=True):
        super().__init__()

        self.input_units = input_units
        self.units = units
        self.activation = activation
        self.regularization_penalty = regularization_penalty
        self.regularization_method = regularization_method
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        
        self.A = tf.keras.activations.get(activation)
        self.W_init = tf.keras.initializers.get(kernel_initializer)
        self.b_init = tf.keras.initializers.get(bias_initializer)
        self.regularizer = SSRegularizer(self.regularization_penalty, self.regularization_method)
        
        self.W = tf.Variable(
            name='W',
            initial_value=self.W_init(shape=(input_units, units), dtype=dtype),
            trainable=True)
        
        self.b = tf.Variable(
            name='b',
            initial_value=self.b_init(shape=(units,), dtype=dtype),
            trainable=True)
        
        if self.regularization_method is not None:
            self.add_loss(lambda: self.regularizer(tf.concat([self.W, tf.reshape(self.b, (1, -1))], axis=0)))
    
    def call(self, inputs):
        return self.A(tf.matmul(inputs, self.W) + self.b)
    
    def copy_without_regularization(self):
        copy = SSLayer(
            self.input_units, 
            self.units, 
            self.activation, 
            regularization_penalty=self.regularization_penalty, 
            regularization_method=None, 
            kernel_initializer=self.kernel_initializer, 
            bias_initializer=self.bias_initializer
        )
        copy.W = self.W
        copy.b = self.b
        return copy


class SSModel(tf.keras.Model):
    def __init__(self, layer_sizes, activation=None, regularization_penalty=0.01, regularization_method='weighted_l1', kernel_initializer='glorot_uniform', bias_initializer='zeros'):
        super().__init__()
        
        self.sslayers = list()
        for l in range(len(layer_sizes) - 1):
            input_units = layer_sizes[l]
            units = layer_sizes[l + 1]
            if l < len(layer_sizes) - 2:
                layer = SSLayer(input_units, units, activation, regularization_penalty, regularization_method, kernel_initializer, bias_initializer)
            else:  # Last layer
                layer = SSLayer(input_units, units, 'softmax', 0., regularization_method, kernel_initializer, bias_initializer)
            self.sslayers.append(layer)

    def call(self, inputs):
        x = inputs
        for layer in self.sslayers:
            x = layer(x)
        return x
    
    def get_layer_sizes(self):
        layer_sizes = list()
        for l in range(len(self.sslayers)):
            layer = self.sslayers[l]
            layer_sizes.append(layer.W.shape[0])
            if l == len(self.sslayers) - 1:  # Last layer
                layer_sizes.append(layer.W.shape[1])
        return layer_sizes
    
    def get_hidden_layer_sizes(self):
        return self.get_layer_sizes()[1:-1]
    
    def remove_regularization(self):
        for l in range(len(self.sslayers)):
            self.sslayers[l] = self.sslayers[l].copy_without_regularization()
    
    def get_regularization_penalty(self):
        return self.sslayers[0].regularizer.regularization_penalty
    
    def set_regularization_penalty(self, regularization_penalty):
        for l in range(0, len(self.sslayers) - 1):  # Every layer except of the last is regularized
            self.sslayers[l].regularizer.regularization_penalty = regularization_penalty
    
    def prune(self, threshold=0.001):
        for l in range(len(self.sslayers) - 1):
            layer1 = self.sslayers[l]
            layer2 = self.sslayers[l + 1]
            
            W1 = layer1.W.value()
            b1 = layer1.b.value()
            W2 = layer2.W.value()

            weights_with_biases = tf.concat([W1, tf.reshape(b1, (1, -1))], axis=0)
            neurons_are_active = tf.math.reduce_max(tf.abs(weights_with_biases), axis=0) >= threshold
            active_neurons_indices = tf.reshape(tf.where(neurons_are_active), (-1,))
            
            new_W1 = tf.gather(W1, active_neurons_indices, axis=1)
            new_b1 = tf.gather(b1, active_neurons_indices, axis=0)
            new_W2 = tf.gather(W2, active_neurons_indices, axis=0)

            layer1.W = tf.Variable(name='W', initial_value=new_W1, trainable=True)
            layer1.b = tf.Variable(name='b', initial_value=new_b1, trainable=True)
            layer2.W = tf.Variable(name='W', initial_value=new_W2, trainable=True)
    
    def grow(self, percentage, min_new_neurons=5, scaling_factor=0.001):   
        for l in range(len(self.sslayers) - 1):
            layer1 = self.sslayers[l]
            layer2 = self.sslayers[l + 1]
       
            W1 = layer1.W.value()
            b1 = layer1.b.value()
            W2 = layer2.W.value()

            n_new_neurons = max(min_new_neurons, int(W1.shape[1] * percentage))

            W1_growth = layer1.W_init(shape=(W1.shape[0], W1.shape[1] + n_new_neurons), dtype=dtype)[:, -n_new_neurons:] * scaling_factor
            b1_growth = layer1.b_init(shape=(n_new_neurons,), dtype=dtype)
            W2_growth = layer2.W_init(shape=(W2.shape[0] + n_new_neurons, W2.shape[1]), dtype=dtype)[-n_new_neurons:, :] * scaling_factor  # TODO is it better to be multiplying here by scaling_factor? It does help with not increasing the max weights of existing neurons when new neurons are added.

            new_W1 = tf.concat([W1, W1_growth], axis=1)
            new_b1 = tf.concat([b1, b1_growth], axis=0)
            new_W2 = tf.concat([W2, W2_growth], axis=0)

            layer1.W = tf.Variable(name='W1', initial_value=new_W1, trainable=True)
            layer1.b = tf.Variable(name='b1', initial_value=new_b1, trainable=True)
            layer2.W = tf.Variable(name='W2', initial_value=new_W2, trainable=True)
    
    @staticmethod
    def get_param_string(weights, bias):
        param_string = ""
        weights_with_bias = tf.concat([weights, tf.reshape(bias, (1, -1))], axis=0)
        max_parameters = tf.math.reduce_max(tf.abs(weights_with_bias), axis=0).numpy()
        magnitudes = np.floor(np.log10(max_parameters))
        for m in magnitudes:
            if m > 0:
                m = 0
            param_string += str(int(-m))
        return param_string
    
    def print_neurons(self):
        for layer in self.sslayers[:-1]:
            print(self.get_param_string(layer.W, layer.b))
    
    def evaluate(self, x, y, validation_data):
        x_val = validation_data[0]
        y_val = validation_data[1]

        y_pred = self(x)
        loss = float(tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y, y_pred)))
        accuracy = float(tf.reduce_mean(tf.keras.metrics.sparse_categorical_accuracy(y, y_pred)))
        
        y_val_pred = self(x_val)
        val_loss = float(tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y_val, y_val_pred)))
        val_accuracy = float(tf.reduce_mean(tf.keras.metrics.sparse_categorical_accuracy(y_val, y_val_pred)))

        return loss, accuracy, val_loss, val_accuracy
    
    def print_epoch_statistics(self, x, y, validation_data, print_neurons):
        loss, accuracy, val_loss, val_accuracy = self.evaluate(x, y, validation_data)
        print(f"loss: {loss} - accuracy: {accuracy} - val_loss: {val_loss} - val_accuracy: {val_accuracy} - penalty: {model.get_regularization_penalty()}")
        hidden_layer_sizes = self.get_hidden_layer_sizes()
        print(f"hidden layer sizes: {hidden_layer_sizes}, total neurons: {sum(hidden_layer_sizes)}")
        if print_neurons:
            self.print_neurons()
    
    def update_history(self, x, y, validation_data, history):
        loss, accuracy, val_loss, val_accuracy = self.evaluate(x, y, validation_data)
        history['loss'].append(loss)
        history['accuracy'].append(accuracy)
        history['val_loss'].append(val_loss)
        history['val_accuracy'].append(val_accuracy)

    def fit(self, x, y, optimizer, epochs, self_scaling_epochs, batch_size, min_new_neurons, validation_data, pruning_threshold=0.001, 
            regularization_penalty_multiplier=1., stall_coefficient=1, growth_percentage=0.2, mini_epochs_per_epoch=1, verbose=True, print_neurons=False):
        train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
        train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

        history = {
            'loss': list(),
            'accuracy': list(),
            'val_loss': list(),
            'val_accuracy': list(),
        }

        best_val_loss = np.inf
        training_stalled = False
        for epoch in range(epochs):
            if verbose:
                print("##########################################################")
                print(f"Epoch {epoch + 1}/{epochs}")

            if epoch < self_scaling_epochs:
                if verbose:
                    print("Before growing:")
                    self.print_epoch_statistics(x, y, validation_data, print_neurons)

                loss, accuracy, val_loss, val_accuracy = self.evaluate(x, y, validation_data)
                if val_loss >= best_val_loss * stall_coefficient:
                    if not training_stalled:
                        penalty = self.get_regularization_penalty() * regularization_penalty_multiplier
                        self.set_regularization_penalty(penalty)
                        training_stalled = True
                else:
                    best_val_loss = val_loss
                    training_stalled = False

                self.grow(percentage=growth_percentage, min_new_neurons=min_new_neurons, scaling_factor=pruning_threshold)
                if verbose:
                    print("After growing:")
                    self.print_epoch_statistics(x, y, validation_data, print_neurons)
            
            if epoch == self_scaling_epochs:
                self.remove_regularization()

            for mini_epoch in range(mini_epochs_per_epoch):
                for step, (x_batch, y_batch) in enumerate(train_dataset):
                    with tf.GradientTape() as tape:
                        y_pred = self(x_batch, training=True)
                        loss_value = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y_batch, y_pred))
                        loss_value += sum(self.losses)

                    grads = tape.gradient(loss_value, self.trainable_variables)
                    optimizer.apply_gradients(zip(grads, self.trainable_variables))
            
            if epoch < self_scaling_epochs:
                if verbose:
                    print("Before pruning:")
                    self.print_epoch_statistics(x, y, validation_data, print_neurons)
                self.prune(threshold=pruning_threshold)
                if verbose:
                    print("After pruning:")
                    self.print_epoch_statistics(x, y, validation_data, print_neurons)
            else:
                if verbose:
                    self.print_epoch_statistics(x, y, validation_data, print_neurons)
            
            self.update_history(x, y, validation_data, history)

        return history

# Concept drift

## Dynamic auto-sizing

In [23]:
epochs = 40
self_scaling_epochs = 40
batch_size = 32
min_new_neurons = 100

In [24]:
%%time

model = SSModel(layer_sizes=[3072, 300, 300, 300, 300, 10], activation='selu', regularization_penalty=0.000001, 
                regularization_method='weighted_l1', kernel_initializer='lecun_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002)

model.fit(X_norm_automobiles_dogs_train, y_automobiles_dogs_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_norm_automobiles_dogs_test, y_automobiles_dogs_test))

##########################################################
Epoch 1/40
Before growing:
loss: 2.7479329109191895 - accuracy: 0.0773809552192688 - val_loss: 2.7470860481262207 - val_accuracy: 0.07833333313465118 - penalty: 1e-06
hidden layer sizes: [300, 300, 300, 300], total neurons: 1200
After growing:
loss: 2.7479329109191895 - accuracy: 0.0773809552192688 - val_loss: 2.747086524963379 - val_accuracy: 0.07833333313465118 - penalty: 1e-06
hidden layer sizes: [400, 400, 400, 400], total neurons: 1600
Before pruning:
loss: 0.23003938794136047 - accuracy: 0.9123809337615967 - val_loss: 0.3350624144077301 - val_accuracy: 0.8761110901832581 - penalty: 1e-06
hidden layer sizes: [400, 400, 400, 400], total neurons: 1600
After pruning:
loss: 0.23006406426429749 - accuracy: 0.9123809337615967 - val_loss: 0.3350726366043091 - val_accuracy: 0.8761110901832581 - penalty: 1e-06
hidden layer sizes: [318, 302, 307, 379], total neurons: 1306
##########################################################
Ep

In [27]:
(603 + 573 + 586 + 471 + 487) / 5

544.0

In [25]:
epochs = 5
self_scaling_epochs = 5

model.fit(X_train_norm, y_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_test_norm, y_test))

##########################################################
Epoch 1/5
Before growing:
loss: 18.775209426879883 - accuracy: 0.19422000646591187 - val_loss: 18.74847412109375 - val_accuracy: 0.19419999420642853 - penalty: 1e-06
hidden layer sizes: [232, 231, 66, 74], total neurons: 603
After growing:
loss: 18.775209426879883 - accuracy: 0.19422000646591187 - val_loss: 18.748472213745117 - val_accuracy: 0.19419999420642853 - penalty: 1e-06
hidden layer sizes: [332, 331, 166, 174], total neurons: 1003
Before pruning:
loss: 1.5507994890213013 - accuracy: 0.4280799925327301 - val_loss: 1.5574026107788086 - val_accuracy: 0.42340001463890076 - penalty: 1e-06
hidden layer sizes: [332, 331, 166, 174], total neurons: 1003
After pruning:
loss: 1.5505298376083374 - accuracy: 0.42805999517440796 - val_loss: 1.5571227073669434 - val_accuracy: 0.42399999499320984 - penalty: 1e-06
hidden layer sizes: [102, 317, 166, 174], total neurons: 759
##########################################################
Epoc

{'accuracy': [0.42805999517440796,
  0.4823800027370453,
  0.503279983997345,
  0.520039975643158,
  0.5401600003242493],
 'loss': [1.5505298376083374,
  1.428890347480774,
  1.3712053298950195,
  1.321232795715332,
  1.2820333242416382],
 'val_accuracy': [0.42399999499320984,
  0.47369998693466187,
  0.48339998722076416,
  0.4968000054359436,
  0.5048999786376953],
 'val_loss': [1.5571227073669434,
  1.4634933471679688,
  1.4311031103134155,
  1.4023185968399048,
  1.3824816942214966]}

In [26]:
model.fit(X_train_norm, y_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_test_norm, y_test))

##########################################################
Epoch 1/5
Before growing:
loss: 1.2820333242416382 - accuracy: 0.5401600003242493 - val_loss: 1.3824816942214966 - val_accuracy: 0.5048999786376953 - penalty: 1e-06
hidden layer sizes: [180, 206, 171, 150], total neurons: 707
After growing:
loss: 1.2820332050323486 - accuracy: 0.5401600003242493 - val_loss: 1.3824814558029175 - val_accuracy: 0.5048999786376953 - penalty: 1e-06
hidden layer sizes: [280, 306, 271, 250], total neurons: 1107
Before pruning:
loss: 1.2507189512252808 - accuracy: 0.5523399710655212 - val_loss: 1.3719888925552368 - val_accuracy: 0.5103999972343445 - penalty: 1e-06
hidden layer sizes: [280, 306, 271, 250], total neurons: 1107
After pruning:
loss: 1.2510151863098145 - accuracy: 0.5519999861717224 - val_loss: 1.372218370437622 - val_accuracy: 0.5094000101089478 - penalty: 1e-06
hidden layer sizes: [142, 204, 164, 152], total neurons: 662
##########################################################
Epoch 2/5

{'accuracy': [0.5519999861717224,
  0.5580000281333923,
  0.5748999714851379,
  0.5788400173187256,
  0.5884000062942505],
 'loss': [1.2510151863098145,
  1.2328946590423584,
  1.19416344165802,
  1.1805680990219116,
  1.1596497297286987],
 'val_accuracy': [0.5094000101089478,
  0.5092999935150146,
  0.5189999938011169,
  0.5149999856948853,
  0.5223000049591064],
 'val_loss': [1.372218370437622,
  1.36928129196167,
  1.3489078283309937,
  1.3518704175949097,
  1.349127173423767]}

In [28]:
(662 + 699 + 697 + 698 + 731) / 5

697.4

In [42]:
epochs = 40
self_scaling_epochs = 40
batch_size = 32
min_new_neurons = 100

In [43]:
%%time

model = SSModel(layer_sizes=[3072, 300, 300, 300, 300, 10], activation='selu', regularization_penalty=0.00001, 
                regularization_method='weighted_l1', kernel_initializer='lecun_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002)

model.fit(X_norm_automobiles_dogs_train, y_automobiles_dogs_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_norm_automobiles_dogs_test, y_automobiles_dogs_test))

##########################################################
Epoch 1/40
Before growing:
loss: 2.361034631729126 - accuracy: 0.19809523224830627 - val_loss: 2.411349058151245 - val_accuracy: 0.2013888955116272 - penalty: 1e-05
hidden layer sizes: [300, 300, 300, 300], total neurons: 1200
After growing:
loss: 2.361035108566284 - accuracy: 0.19809523224830627 - val_loss: 2.411349296569824 - val_accuracy: 0.2013888955116272 - penalty: 1e-05
hidden layer sizes: [400, 400, 400, 400], total neurons: 1600
Before pruning:
loss: 0.33096709847450256 - accuracy: 0.8698809742927551 - val_loss: 0.4045203626155853 - val_accuracy: 0.8422222137451172 - penalty: 1e-05
hidden layer sizes: [400, 400, 400, 400], total neurons: 1600
After pruning:
loss: 0.33106324076652527 - accuracy: 0.8698809742927551 - val_loss: 0.4046081602573395 - val_accuracy: 0.8424999713897705 - penalty: 1e-05
hidden layer sizes: [300, 300, 300, 300], total neurons: 1200
##########################################################
Epoch

In [46]:
(112 + 114 + 91 + 133 + 118) / 5

113.6

In [44]:
epochs = 5
self_scaling_epochs = 5

model.fit(X_train_norm, y_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_test_norm, y_test))

##########################################################
Epoch 1/5
Before growing:
loss: 12.513557434082031 - accuracy: 0.19273999333381653 - val_loss: 12.51563549041748 - val_accuracy: 0.1932000070810318 - penalty: 1e-05
hidden layer sizes: [30, 33, 13, 42], total neurons: 118
After growing:
loss: 12.51356029510498 - accuracy: 0.19273999333381653 - val_loss: 12.515637397766113 - val_accuracy: 0.1932000070810318 - penalty: 1e-05
hidden layer sizes: [130, 133, 113, 142], total neurons: 518
Before pruning:
loss: 1.7758828401565552 - accuracy: 0.32624000310897827 - val_loss: 1.7807749509811401 - val_accuracy: 0.3278000056743622 - penalty: 1e-05
hidden layer sizes: [130, 133, 113, 142], total neurons: 518
After pruning:
loss: 1.776384949684143 - accuracy: 0.32627999782562256 - val_loss: 1.7815121412277222 - val_accuracy: 0.32679998874664307 - penalty: 1e-05
hidden layer sizes: [34, 132, 97, 131], total neurons: 394
##########################################################
Epoch 2/5
Befo

{'accuracy': [0.32627999782562256,
  0.40143999457359314,
  0.4325000047683716,
  0.44642001390457153,
  0.458979994058609],
 'loss': [1.776384949684143,
  1.6299527883529663,
  1.555314064025879,
  1.517514705657959,
  1.4876476526260376],
 'val_accuracy': [0.32679998874664307,
  0.39739999175071716,
  0.42809998989105225,
  0.4390999972820282,
  0.44589999318122864],
 'val_loss': [1.7815121412277222,
  1.6344599723815918,
  1.5709097385406494,
  1.5453683137893677,
  1.5232874155044556]}

In [45]:
epochs = 5
self_scaling_epochs = 5

model.fit(X_train_norm, y_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_test_norm, y_test))

##########################################################
Epoch 1/5
Before growing:
loss: 1.4876476526260376 - accuracy: 0.458979994058609 - val_loss: 1.5232874155044556 - val_accuracy: 0.44589999318122864 - penalty: 1e-05
hidden layer sizes: [24, 72, 37, 96], total neurons: 229
After growing:
loss: 1.4876477718353271 - accuracy: 0.458979994058609 - val_loss: 1.523287296295166 - val_accuracy: 0.44589999318122864 - penalty: 1e-05
hidden layer sizes: [124, 172, 137, 196], total neurons: 629
Before pruning:
loss: 1.4724597930908203 - accuracy: 0.4686799943447113 - val_loss: 1.5190184116363525 - val_accuracy: 0.4507000148296356 - penalty: 1e-05
hidden layer sizes: [124, 172, 137, 196], total neurons: 629
After pruning:
loss: 1.4726018905639648 - accuracy: 0.4686200022697449 - val_loss: 1.5191874504089355 - val_accuracy: 0.4503999948501587 - penalty: 1e-05
hidden layer sizes: [31, 68, 35, 93], total neurons: 227
##########################################################
Epoch 2/5
Before gr

{'accuracy': [0.4686200022697449,
  0.4791800081729889,
  0.4816800057888031,
  0.48774001002311707,
  0.4894599914550781],
 'loss': [1.4726018905639648,
  1.446529746055603,
  1.4349040985107422,
  1.4203665256500244,
  1.4123131036758423],
 'val_accuracy': [0.4503999948501587,
  0.45179998874664307,
  0.4577000141143799,
  0.454800009727478,
  0.45829999446868896],
 'val_loss': [1.5191874504089355,
  1.502846598625183,
  1.4980697631835938,
  1.491937279701233,
  1.487076997756958]}

In [62]:
epochs = 20
self_scaling_epochs = 20
batch_size = 32
min_new_neurons = 100

In [63]:
%%time

model = SSModel(layer_sizes=[3072, 4000, 4000, 4000, 4000, 10], activation='selu', regularization_penalty=0.0000001, 
                regularization_method='weighted_l1', kernel_initializer='lecun_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002)

model.fit(X_norm_automobiles_dogs_train, y_automobiles_dogs_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_norm_automobiles_dogs_test, y_automobiles_dogs_test))

##########################################################
Epoch 1/20
Before growing:
loss: 2.711106538772583 - accuracy: 0.09107142686843872 - val_loss: 2.7317590713500977 - val_accuracy: 0.09361111372709274 - penalty: 1e-07
hidden layer sizes: [4000, 4000, 4000, 4000], total neurons: 16000
After growing:
loss: 2.711106777191162 - accuracy: 0.09107142686843872 - val_loss: 2.7317588329315186 - val_accuracy: 0.09361111372709274 - penalty: 1e-07
hidden layer sizes: [4800, 4800, 4800, 4800], total neurons: 19200
Before pruning:
loss: 0.34226396679878235 - accuracy: 0.8897619247436523 - val_loss: 0.44591420888900757 - val_accuracy: 0.8700000047683716 - penalty: 1e-07
hidden layer sizes: [4800, 4800, 4800, 4800], total neurons: 19200
After pruning:
loss: 0.34194567799568176 - accuracy: 0.8899999856948853 - val_loss: 0.44576242566108704 - val_accuracy: 0.8711110949516296 - penalty: 1e-07
hidden layer sizes: [4000, 4000, 4015, 4008], total neurons: 16023
######################################

In [64]:
epochs = 5
self_scaling_epochs = 5

model.fit(X_train_norm, y_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_test_norm, y_test))

##########################################################
Epoch 1/5
Before growing:
loss: 20.738924026489258 - accuracy: 0.19359999895095825 - val_loss: 20.696252822875977 - val_accuracy: 0.19349999725818634 - penalty: 1e-07
hidden layer sizes: [303, 581, 302, 470], total neurons: 1656
After growing:
loss: 20.73892593383789 - accuracy: 0.19359999895095825 - val_loss: 20.69625473022461 - val_accuracy: 0.19349999725818634 - penalty: 1e-07
hidden layer sizes: [403, 697, 402, 570], total neurons: 2072
Before pruning:
loss: 1.4864871501922607 - accuracy: 0.45423999428749084 - val_loss: 1.5066421031951904 - val_accuracy: 0.4424000084400177 - penalty: 1e-07
hidden layer sizes: [403, 697, 402, 570], total neurons: 2072
After pruning:
loss: 1.486430287361145 - accuracy: 0.454259991645813 - val_loss: 1.5065820217132568 - val_accuracy: 0.4426000118255615 - penalty: 1e-07
hidden layer sizes: [335, 697, 402, 536], total neurons: 1970
##########################################################
Epoch

{'accuracy': [0.454259991645813,
  0.5199599862098694,
  0.5288800001144409,
  0.5464199781417847,
  0.5976600050926208],
 'loss': [1.486430287361145,
  1.3302218914031982,
  1.2860933542251587,
  1.245113492012024,
  1.129961609840393],
 'val_accuracy': [0.4426000118255615,
  0.5031999945640564,
  0.4982999861240387,
  0.5023000240325928,
  0.5358999967575073],
 'val_loss': [1.5065820217132568,
  1.3873450756072998,
  1.381866693496704,
  1.382185697555542,
  1.2924774885177612]}

In [65]:
epochs = 5
self_scaling_epochs = 5

model.fit(X_train_norm, y_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_test_norm, y_test))

##########################################################
Epoch 1/5
Before growing:
loss: 1.129961609840393 - accuracy: 0.5976600050926208 - val_loss: 1.2924774885177612 - val_accuracy: 0.5358999967575073 - penalty: 1e-07
hidden layer sizes: [667, 861, 547, 466], total neurons: 2541
After growing:
loss: 1.129961609840393 - accuracy: 0.5976600050926208 - val_loss: 1.2924774885177612 - val_accuracy: 0.5358999967575073 - penalty: 1e-07
hidden layer sizes: [800, 1033, 656, 566], total neurons: 3055
Before pruning:
loss: 1.0870468616485596 - accuracy: 0.6112599968910217 - val_loss: 1.283087134361267 - val_accuracy: 0.5410000085830688 - penalty: 1e-07
hidden layer sizes: [800, 1033, 656, 566], total neurons: 3055
After pruning:
loss: 1.0870997905731201 - accuracy: 0.611020028591156 - val_loss: 1.2830854654312134 - val_accuracy: 0.5408999919891357 - penalty: 1e-07
hidden layer sizes: [728, 952, 583, 503], total neurons: 2766
##########################################################
Epoch 2/

{'accuracy': [0.611020028591156,
  0.627560019493103,
  0.6269199848175049,
  0.6485000252723694,
  0.6655799746513367],
 'loss': [1.0870997905731201,
  1.0354007482528687,
  1.0331312417984009,
  0.9818136096000671,
  0.9419556260108948],
 'val_accuracy': [0.5408999919891357,
  0.541700005531311,
  0.534600019454956,
  0.5472999811172485,
  0.5486999750137329],
 'val_loss': [1.2830854654312134,
  1.2823516130447388,
  1.3145761489868164,
  1.2891998291015625,
  1.3002042770385742]}

### Group sparsity regularization

In [52]:
epochs = 20
self_scaling_epochs = 20
batch_size = 32
min_new_neurons = 100

In [53]:
%%time

model = SSModel(layer_sizes=[3072, 300, 300, 300, 300, 10], activation='selu', regularization_penalty=0.01, 
                regularization_method='group_sparsity', kernel_initializer='lecun_normal')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002)

model.fit(X_norm_automobiles_dogs_train, y_automobiles_dogs_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_norm_automobiles_dogs_test, y_automobiles_dogs_test))

##########################################################
Epoch 1/20
Before growing:
loss: 2.7622008323669434 - accuracy: 0.14464285969734192 - val_loss: 2.7266921997070312 - val_accuracy: 0.14527778327465057 - penalty: 0.01
hidden layer sizes: [300, 300, 300, 300], total neurons: 1200
After growing:
loss: 2.7622008323669434 - accuracy: 0.14464285969734192 - val_loss: 2.7266921997070312 - val_accuracy: 0.14527778327465057 - penalty: 0.01
hidden layer sizes: [400, 400, 400, 400], total neurons: 1600
Before pruning:
loss: 0.22202880680561066 - accuracy: 0.9163095355033875 - val_loss: 0.3070373237133026 - val_accuracy: 0.8811110854148865 - penalty: 0.01
hidden layer sizes: [400, 400, 400, 400], total neurons: 1600
After pruning:
loss: 0.22240205109119415 - accuracy: 0.9153571724891663 - val_loss: 0.3074040710926056 - val_accuracy: 0.8816666603088379 - penalty: 0.01
hidden layer sizes: [300, 300, 301, 300], total neurons: 1201
##########################################################
Epo

In [55]:
epochs = 5
self_scaling_epochs = 5

model.fit(X_train_norm, y_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_test_norm, y_test))

##########################################################
Epoch 1/5
Before growing:
loss: 7.067015647888184 - accuracy: 0.18945999443531036 - val_loss: 7.076200008392334 - val_accuracy: 0.18940000236034393 - penalty: 0.01
hidden layer sizes: [37, 8, 3, 39], total neurons: 87
After growing:
loss: 7.067009449005127 - accuracy: 0.18945999443531036 - val_loss: 7.076193809509277 - val_accuracy: 0.18940000236034393 - penalty: 0.01
hidden layer sizes: [137, 108, 103, 139], total neurons: 487
Before pruning:
loss: 1.8373908996582031 - accuracy: 0.2900800108909607 - val_loss: 1.8446769714355469 - val_accuracy: 0.2863999903202057 - penalty: 0.01
hidden layer sizes: [137, 108, 103, 139], total neurons: 487
After pruning:
loss: 1.8373355865478516 - accuracy: 0.2902800142765045 - val_loss: 1.8447535037994385 - val_accuracy: 0.287200003862381 - penalty: 0.01
hidden layer sizes: [29, 45, 29, 99], total neurons: 202
##########################################################
Epoch 2/5
Before growing:


{'accuracy': [0.2902800142765045,
  0.3094800114631653,
  0.33263999223709106,
  0.3366200029850006,
  0.3519800007343292],
 'loss': [1.8373355865478516,
  1.793487787246704,
  1.7749556303024292,
  1.7585344314575195,
  1.7463375329971313],
 'val_accuracy': [0.287200003862381,
  0.30649998784065247,
  0.33070001006126404,
  0.33480000495910645,
  0.34790000319480896],
 'val_loss': [1.8447535037994385,
  1.8058598041534424,
  1.7882664203643799,
  1.7735812664031982,
  1.7631163597106934]}

In [56]:
epochs = 5
self_scaling_epochs = 5

model.fit(X_train_norm, y_train, optimizer, epochs, self_scaling_epochs, batch_size, 
          min_new_neurons, validation_data=(X_test_norm, y_test))

##########################################################
Epoch 1/5
Before growing:
loss: 1.7463375329971313 - accuracy: 0.3519800007343292 - val_loss: 1.7631163597106934 - val_accuracy: 0.34790000319480896 - penalty: 0.01
hidden layer sizes: [57, 19, 12, 88], total neurons: 176
After growing:
loss: 1.7463372945785522 - accuracy: 0.3519800007343292 - val_loss: 1.7631162405014038 - val_accuracy: 0.34790000319480896 - penalty: 0.01
hidden layer sizes: [157, 119, 112, 188], total neurons: 576
Before pruning:
loss: 1.7129186391830444 - accuracy: 0.370959997177124 - val_loss: 1.7313566207885742 - val_accuracy: 0.3686999976634979 - penalty: 0.01
hidden layer sizes: [157, 119, 112, 188], total neurons: 576
After pruning:
loss: 1.7132741212844849 - accuracy: 0.37031999230384827 - val_loss: 1.7316983938217163 - val_accuracy: 0.3682999908924103 - penalty: 0.01
hidden layer sizes: [21, 17, 10, 88], total neurons: 136
##########################################################
Epoch 2/5
Before gro

{'accuracy': [0.37031999230384827,
  0.3739599883556366,
  0.38067999482154846,
  0.3883399963378906,
  0.38618001341819763],
 'loss': [1.7132741212844849,
  1.690038800239563,
  1.6849206686019897,
  1.6698124408721924,
  1.671156883239746],
 'val_accuracy': [0.3682999908924103,
  0.37389999628067017,
  0.3776000142097473,
  0.38260000944137573,
  0.37940001487731934],
 'val_loss': [1.7316983938217163,
  1.7125375270843506,
  1.7064882516860962,
  1.6978199481964111,
  1.6991358995437622]}

## Corresponding static models

In [37]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3072, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(232, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(231, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(66, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(74, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='lecun_normal'),
])

In [38]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002), metrics=['accuracy'])

In [39]:
%%time

model.fit(X_norm_automobiles_dogs_train, y_automobiles_dogs_train, epochs=40, validation_data=(X_norm_automobiles_dogs_test, y_automobiles_dogs_test))

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
CPU times: user 43 s, sys: 3.9 s, total: 46.9 s
Wall time: 41.4 s


<keras.callbacks.History at 0x7fdf517c1f50>

In [40]:
%%time

model.fit(X_train_norm, y_train, epochs=5, validation_data=(X_test_norm, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 28.2 s, sys: 2.24 s, total: 30.4 s
Wall time: 25.7 s


<keras.callbacks.History at 0x7fdf515d4710>

In [41]:
%%time

model.fit(X_train_norm, y_train, epochs=5, validation_data=(X_test_norm, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 28.3 s, sys: 2.31 s, total: 30.6 s
Wall time: 41.3 s


<keras.callbacks.History at 0x7fdf515a86d0>

In [47]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3072, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(30, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(33, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(13, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(42, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='lecun_normal'),
])

In [48]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002), metrics=['accuracy'])

In [49]:
%%time

model.fit(X_norm_automobiles_dogs_train, y_automobiles_dogs_train, epochs=40, validation_data=(X_norm_automobiles_dogs_test, y_automobiles_dogs_test))

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
CPU times: user 42.3 s, sys: 4.28 s, total: 46.6 s
Wall time: 40.9 s


<keras.callbacks.History at 0x7fdf6809f990>

In [50]:
%%time

model.fit(X_train_norm, y_train, epochs=5, validation_data=(X_test_norm, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 27.7 s, sys: 2.29 s, total: 30 s
Wall time: 25.2 s


<keras.callbacks.History at 0x7fdf513e5690>

In [51]:
%%time

model.fit(X_train_norm, y_train, epochs=5, validation_data=(X_test_norm, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 27.8 s, sys: 2.53 s, total: 30.3 s
Wall time: 41.3 s


<keras.callbacks.History at 0x7fdf513b8dd0>

In [66]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3072, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(303, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(581, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(302, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(470, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='lecun_normal'),
])

In [67]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002), metrics=['accuracy'])

In [68]:
%%time

model.fit(X_norm_automobiles_dogs_train, y_automobiles_dogs_train, epochs=20, validation_data=(X_norm_automobiles_dogs_test, y_automobiles_dogs_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
CPU times: user 22.3 s, sys: 1.86 s, total: 24.2 s
Wall time: 41.4 s


<keras.callbacks.History at 0x7fdfa012e510>

In [69]:
%%time

model.fit(X_train_norm, y_train, epochs=5, validation_data=(X_test_norm, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 28.8 s, sys: 2.25 s, total: 31 s
Wall time: 26.5 s


<keras.callbacks.History at 0x7fdf510540d0>

In [70]:
%%time

model.fit(X_train_norm, y_train, epochs=5, validation_data=(X_test_norm, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 28.8 s, sys: 2.55 s, total: 31.3 s
Wall time: 27 s


<keras.callbacks.History at 0x7fdf51025c10>

In [57]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3072, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(117, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(109, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(104, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(139, activation='selu', kernel_initializer='lecun_normal'),
    tf.keras.layers.Dense(10, activation='softmax', kernel_initializer='lecun_normal'),
])

In [58]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002), metrics=['accuracy'])

In [59]:
%%time

model.fit(X_norm_automobiles_dogs_train, y_automobiles_dogs_train, epochs=20, validation_data=(X_norm_automobiles_dogs_test, y_automobiles_dogs_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
CPU times: user 21.8 s, sys: 1.79 s, total: 23.6 s
Wall time: 41.4 s


<keras.callbacks.History at 0x7fdf5146af10>

In [60]:
%%time

model.fit(X_train_norm, y_train, epochs=5, validation_data=(X_test_norm, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 28.1 s, sys: 2.29 s, total: 30.4 s
Wall time: 41.3 s


<keras.callbacks.History at 0x7fdf511fc110>

In [61]:
%%time

model.fit(X_train_norm, y_train, epochs=5, validation_data=(X_test_norm, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 27.9 s, sys: 2.48 s, total: 30.4 s
Wall time: 25.5 s


<keras.callbacks.History at 0x7fdf511cbc90>