# Neural Networks

In this tutorial we learn how the most basic neural networks work.

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

# Training a neural network

## Generate data
Create toy dataset, define class for weights intialization etc.

In [None]:
# set the true weights, use them for data generation later:
w_1 = np.array(np.transpose([[2, 1], [1, 0], [1, -1]]))
b_1 = np.array(np.expand_dims([1, 0, 2], axis=0))
w_2 = np.array([[1,], [1,], [-2,]])
b_2 = 1

w_1.shape, b_1.shape, w_2.shape

# Print the ouput nicely
print("WEIGHTS OF THE MODEL \n")
print("Hidden layer weights: \n{} \n".format(w_1))
print("Hidden layer bias: \n{} \n".format(b_1))
print("Output layer weights: \n{} \n".format(w_2))
print("Hidden layer bias: \n{} \n".format(b_2))

In [None]:
# define class for initialization of weights - fix seed to use the same numbers always
class MyInit(tf.keras.initializers.Initializer):

  def __init__(self, mean, std):
    self.mean = mean
    self.std = std

  def __call__(self, shape, dtype=None, **kwargs):
    tf.random.set_seed(11)
    return tf.cast(tf.cast(tf.random.normal(
        shape, mean=self.mean, stddev=self.std, dtype=dtype), tf.int32), tf.float32)

  def get_config(self):  # To support serialization
    return {"mean": self.mean, "std": self.std}

In [None]:
# define forward propagation
def forward_propagation(x, w_1, b_1, w_2, b_2):
    h = np.matmul(x, w_1) + b_1
    h_relu = np.where(h < 0, 0, h)
    y = np.matmul(h_relu, w_2) + b_2
    
    return {'hidden': h, 'hidden_relu': h_relu, 'prediction': y}

In [None]:
# generate training data using the true weights
np.random.seed(seed=73)
x_1 = [-4] + [int(x) for x in np.random.uniform(-2, 4, 100)]
x_2 = [1] + [int(x) for x in np.random.uniform(-2, 4, 100)]
x = np.transpose(np.array([x_1, x_2]))

y = forward_propagation(x, w_1, b_1, w_2, b_2)['prediction']

In [None]:
# convert training data to tf.data.Dataset with batch size
batch_size = 1
train_dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)

## Initialize model

In [None]:
# create a simple NN with the pre-defined intialization:
inputs = tf.keras.Input(shape=(2,), name="input_values")

x1 = tf.keras.layers.Dense(3,
                           activation="relu",
                           name="hidden",
                           kernel_initializer=MyInit(1, 1),
                           bias_initializer=MyInit(2, 1))(inputs)

outputs = tf.keras.layers.Dense(1,
                                name="predictions",
                                kernel_initializer=MyInit(1, 3),
                                bias_initializer=MyInit(2, 0))(x1)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

# set the loss and the optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
loss_fn = tf.keras.losses.MSE

In [None]:
# Look at the initialized weights
w1 = model.layers[1].weights[0].numpy()
b1 = model.layers[1].weights[1].numpy()

w2 = model.layers[2].weights[0].numpy()
b2 = model.layers[2].weights[1].numpy()

# Print the ouput nicely
print("WEIGHTS OF THE MODEL \n")
print("Hidden layer weights: \n{} \n".format(w1))
print("Hidden layer bias: \n{} \n".format(b1))
print("Output layer weights: \n{} \n".format(w2))
print("Hidden layer bias: \n{} \n".format(b2))

In [None]:
# check the forward propagation
fp = forward_propagation(x[0:1], w1, b1, w2, b2)

# Print the ouput nicely
print("PROPAGATED VALUES \n")
print("Input vector \n{} \n".format(x[0]))
print("Hidden layer before activation \n{} \n".format(fp["hidden"]))
print("Hidden layer after relu \n{} \n".format(fp["hidden_relu"]))
print("Prediction of output layer \n{} \n".format(fp["prediction"]))

In [None]:
# take the first batch
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
    break

x_batch_train.numpy(), y_batch_train.numpy()

In [None]:
# Calculate the gradients on the batch:
with tf.GradientTape() as tape:
    prediction = model(x_batch_train, training=True)
    loss_value = loss_fn(y_batch_train, prediction)


grads = tape.gradient(loss_value, model.trainable_weights)

# Print the ouput nicely
print("Prediction:{}, Target:{}, Loss:{}".format(prediction, y[0], loss_value))
print("\n>>> GRADIENTS:")
print("Hidden layer weights \n{} \n".format(grads[0].numpy()))
print("Hidden layer bias \n{} \n".format(grads[1].numpy()))
print("Output layer weights \n{} \n".format(grads[2].numpy()))
print("Output layer bias \n{} \n".format(grads[3].numpy()))

In [None]:
# apply one SGD update
optimizer.apply_gradients(zip(grads, model.trainable_weights))

In [None]:
# check the weights after the update:
w1 = model.layers[1].weights[0].numpy()
b1 = model.layers[1].weights[1].numpy()

w2 = model.layers[2].weights[0].numpy()
b2 = model.layers[2].weights[1].numpy()

# Print the ouput nicely
print("UPDATED WEIGHTS OF THE MODEL \n")
print("Hidden layer weights: \n{} \n".format(w1))
print("Hidden layer bias: \n{} \n".format(b1))
print("Output layer weights: \n{} \n".format(w2))
print("Hidden layer bias: \n{} \n".format(b2))


In [None]:
# check the prediction
prediction = model(x_batch_train, training=True)
loss_value = loss_fn(y_batch_train, prediction)

print("Prediction:{}, Target:{}, Loss:{}".format(prediction, y[0], loss_value))

### Run the training loop

In [None]:
epochs = 16
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))

    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        with tf.GradientTape() as tape:

            pred = model(x_batch_train, training=True)
            loss_value = loss_fn(y_batch_train, pred)

        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        mse = np.mean((model(x).numpy() - y)**2)
        
    print('>>> Loss {:.3f}'.format(mse))

In [None]:
w1 = model.layers[1].weights[0].numpy()
b1 = model.layers[1].weights[1].numpy()

w2 = model.layers[2].weights[0].numpy()
b2 = model.layers[2].weights[1].numpy()

print("Hidden layer weights: \n{} \n".format(w1))
print("Hidden layer bias: \n{} \n".format(b1))
print("Output layer weights: \n{} \n".format(w2))
print("Hidden layer bias: \n{} \n".format(b2))

In [None]:
np.mean((model(x).numpy() - y)**2)

### Playing with optimizer and learning rates

http://2.bp.blogspot.com/-q6l20Vs4P_w/VPmIC7sEhnI/AAAAAAAACC4/g3UOUX2r_yA/s400/

In [None]:
batch_size = 10
train_dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)

In [None]:
inputs = tf.keras.Input(shape=(2,), name="input_values")

x1 = tf.keras.layers.Dense(3,
                           activation="relu",
                           name="hidden",
                           kernel_initializer=MyInit(1, 1),
                           bias_initializer=MyInit(2, 1))(inputs)

outputs = tf.keras.layers.Dense(1,
                                name="predictions",
                                kernel_initializer=MyInit(1, 3),
                                bias_initializer=MyInit(2, 0))(x1)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
loss_fn = tf.keras.losses.MSE

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001)

epochs = 128
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))

    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        with tf.GradientTape() as tape:

            pred = model(x_batch_train, training=True)
            loss_value = loss_fn(y_batch_train, pred)

        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        mse = np.mean((model(x).numpy() - y)**2)
        
    print('>>> Loss {:.4f}'.format(mse))

In [None]:
w1 = model.layers[1].weights[0].numpy()
b1 = model.layers[1].weights[1].numpy()

w2 = model.layers[2].weights[0].numpy()
b2 = model.layers[2].weights[1].numpy()

print("Hidden layer weights: \n{} \n".format(w1))
print("Hidden layer bias: \n{} \n".format(b1))
print("Output layer weights: \n{} \n".format(w2))
print("Hidden layer bias: \n{} \n".format(b2))

<span style="color:red">**TO DO:** Try different optimizers with different lerning rate, how does the convergence look? Does it converge? How fast? Which one is the best?</span>

## Vanishing gradient

In [None]:
# create a simple NN with the pre-defined intialization:
init = tf.keras.initializers.RandomUniform(minval=0, maxval=1)

inputs = tf.keras.Input(shape=2, name="input_values")

h = tf.keras.layers.Dense(3, activation="tanh")(inputs)
h = tf.keras.layers.Dense(3, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(3, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(3, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(3, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(3, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(3, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(3, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(3, activation="tanh", kernel_initializer=init)(h)
outputs = tf.keras.layers.Dense(1)(h)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

# set the loss and the optimizer
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.MSE

model.summary()

In [None]:
# take the first batch
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
    break

x_batch_train.numpy(), y_batch_train.numpy()

In [None]:
# Calculate the gradients on the batch:
with tf.GradientTape() as tape:
    prediction = model(x_batch_train, training=True)
    loss_value = loss_fn(y_batch_train, prediction)

grads = tape.gradient(loss_value, model.trainable_weights)

In [None]:
# Print the ouput nicely
print("Prediction:{}, Target:{}, Loss:{}".format(prediction, y[0], loss_value))
print("\n>>> GRADIENTS:")

for i in range(0, 18, 2):
    l = int(i / 2 + 1)
    print('-' * 100)
    print("# {} layer weights \n{} \n".format(l, grads[i].numpy()))
    print("# {} layer bias \n{} \n".format(l, grads[i+1].numpy()))

In [None]:
epochs = 100
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))

    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        with tf.GradientTape() as tape:

            pred = model(x_batch_train, training=True)
            loss_value = loss_fn(y_batch_train, pred)

        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        mse = np.mean((model(x).numpy() - y)**2)
        
    print('>>> Loss {:.3f}'.format(mse))

In [None]:
# Check the gradients after some time of training
print("Prediction:{}, Target:{}, Loss:{}".format(prediction, y[0], loss_value))
print("\n>>> GRADIENTS:")

for i in range(0, 18, 2):
    l = int(i / 2 + 1)
    print('-' * 100)
    print("# {} layer weights \n{} \n".format(l, grads[i].numpy()))
    print("# {} layer bias \n{} \n".format(l, grads[i+1].numpy()))

## Two circles example
see https://machinelearningmastery.com/how-to-fix-vanishing-gradients-using-the-rectified-linear-activation-function/

In [None]:
from tensorflow import keras
from sklearn.datasets import make_circles
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.initializers import RandomUniform
from matplotlib import pyplot

In [None]:
# generate 2d classification dataset
X, y = make_circles(n_samples=1000, noise=0.1, random_state=1)
scaler = MinMaxScaler(feature_range=(-1, 1))
X = scaler.fit_transform(X)
# split into train and test
n_train = 500
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

In [None]:
for i in range(2):
    samples_ix = np.where(y == i)
    pyplot.scatter(X[samples_ix, 0], X[samples_ix, 1], label=str(i))
pyplot.legend()
pyplot.show()

In [None]:
# Define model run for various model specifications
def run(model, iterations):
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
    loss_fn = tf.keras.losses.BinaryCrossentropy()

    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    acc_train = []
    acc_test = []
    for r in range(iterations):
        print("After {} episodes:".format((r + 1)* 10))
        history = model.fit(trainX, trainy, validation_data=(testX, testy), epochs=10, batch_size=100, verbose=0)
        acc_train.extend(history.history['accuracy'])
        acc_test.extend(history.history['val_accuracy'])

        prediction = model.predict(trainX)
        prediction = np.where(model.predict(trainX) > 0.5, 1, 0)

        for i in range(2):
            samples_ix = np.where(prediction == i)
            pyplot.scatter(X[samples_ix, 0], X[samples_ix, 1], label=str(i))
        pyplot.legend()
        pyplot.show()

    pyplot.plot(acc_train, label='train')
    pyplot.plot(acc_test, label='test')
    pyplot.legend()
    pyplot.show()
    
    print('Evaluation')
    model.evaluate(testX, testy)

### Models with ReLU

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.GlorotNormal()

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(2, activation="relu", kernel_initializer=init)(inputs)
outputs = tf.keras.layers.Dense(1)(h)
outputs = tf.keras.activations.sigmoid(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model, 20)

In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
loss_fn = tf.keras.losses.BinaryCrossentropy()

model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

acc_train = []
acc_test = []

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.GlorotNormal()

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(10, activation="relu", kernel_initializer=init)(inputs)
outputs = tf.keras.layers.Dense(1)(h)
outputs = tf.keras.activations.sigmoid(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model, 20)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.RandomUniform(minval=0, maxval=1)

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(inputs)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
outputs = tf.keras.layers.Dense(1)(h)
outputs = tf.keras.activations.sigmoid(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model, 20)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.GlorotNormal()

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(inputs)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
outputs = tf.keras.layers.Dense(1)(h)
outputs = tf.keras.activations.sigmoid(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model, 70)

### Models with tanh

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.GlorotNormal()

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(2, activation="tanh", kernel_initializer=init)(inputs)
outputs = tf.keras.layers.Dense(1)(h)
outputs = tf.keras.activations.sigmoid(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model, 20)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.GlorotNormal()

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(10, activation="tanh", kernel_initializer=init)(inputs)
outputs = tf.keras.layers.Dense(1)(h)
outputs = tf.keras.activations.sigmoid(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model, 20)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.GlorotNormal()

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(5, activation="tanh", kernel_initializer=init)(inputs)
h = tf.keras.layers.Dense(5, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="tanh", kernel_initializer=init)(h)
outputs = tf.keras.layers.Dense(1)(h)
outputs = tf.keras.activations.sigmoid(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model, 20)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.RandomUniform(minval=0, maxval=1)

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(5, activation="tanh", kernel_initializer=init)(inputs)
h = tf.keras.layers.Dense(5, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="tanh", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="tanh", kernel_initializer=init)(h)
outputs = tf.keras.layers.Dense(1)(h)
outputs = tf.keras.activations.sigmoid(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model, 20)

In [None]:
model.layers

In [None]:
# Check the gradients
with tf.GradientTape() as tape:
    prediction = model(trainX, training=True)
    loss_value = loss_fn(testy, prediction)

grads = tape.gradient(loss_value, model.trainable_weights)

print("\n>>> GRADIENTS:")

for i in range(len(model.layers)):
    l = int((i + 1) / 2)
    print('-' * 100)
    print("# {} layer weights \n{} \n".format(l, grads[i].numpy()))
    print("# {} layer bias \n{} \n".format(l, grads[i+1].numpy()))

### Models without activations

In [None]:
init = tf.keras.initializers.RandomUniform(minval=0, maxval=1)
init = tf.keras.initializers.GlorotNormal()

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(5, activation=None, kernel_initializer=init)(inputs)
h = tf.keras.layers.Dense(5, activation=None, kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation=None, kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation=None, kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation=None, kernel_initializer=init)(h)
outputs = tf.keras.layers.Dense(1)(h)
outputs = tf.keras.activations.sigmoid(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model, 20)

<span style="color:red">**TO DO:** Similar to the accuracy, store and plot also the train and test loss. Inspect the loss evolution.</span>

### Multiclass classification

In [None]:
from sklearn.datasets import make_classification

In [None]:
# generate 2d classification dataset
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, 
                           n_redundant=0, n_classes=4, random_state=1, n_clusters_per_class=1,
                           class_sep=2)
scaler = MinMaxScaler(feature_range=(-1, 1))
X = scaler.fit_transform(X)
# split into train and test
n_train = 500

trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

for i in range(4):
    samples_ix = np.where(y == i)
    pyplot.scatter(X[samples_ix, 0], X[samples_ix, 1], label=str(i))
pyplot.legend()
pyplot.show()

#### ReLU models

In [None]:
# Define model run for various model specifications
def run(model):
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

    model.compile(loss=loss_fn, optimizer=optimizer, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

    acc_train = []
    acc_test = []
    for r in range(20):
        print("After {} episodes:".format((r + 1)* 10))
        history = model.fit(trainX, trainy, validation_data=(testX, testy), epochs=10, verbose=0)
        acc_train.extend(history.history['sparse_categorical_accuracy'])
        acc_test.extend(history.history['val_sparse_categorical_accuracy'])

        prediction = model.predict(trainX)
        prediction = np.argmax(model.predict(trainX), axis=1)

        for i in range(4):
            samples_ix = np.where(prediction == i)
            pyplot.scatter(X[samples_ix, 0], X[samples_ix, 1], label=str(i))
        pyplot.legend()
        pyplot.show()

    pyplot.plot(acc_train, label='train')
    pyplot.plot(acc_test, label='test')
    pyplot.legend()
    pyplot.show()

    print('Evaluation')
    model.evaluate(testX, testy)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.RandomUniform(minval=0, maxval=1)

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(inputs)
outputs = tf.keras.layers.Dense(4)(h)
outputs = tf.keras.activations.softmax(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.GlorotNormal()

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(10, activation="relu", kernel_initializer=init)(inputs)
h = tf.keras.layers.Dense(10, activation="relu", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(10, activation="relu", kernel_initializer=init)(h)
outputs = tf.keras.layers.Dense(4)(h)
outputs = tf.keras.activations.softmax(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.RandomUniform(minval=0, maxval=1)

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(inputs)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(h)
outputs = tf.keras.layers.Dense(4)(h)
outputs = tf.keras.activations.softmax(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model)

### Complex multiclass

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [None]:
# Define model run for various model specifications
def run(model):
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

    model.compile(loss=loss_fn, optimizer=optimizer, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

    acc_train = []
    acc_test = []
    for r in range(20):
        print("After {} episodes:".format((r + 1)* 10))
        history = model.fit(trainX, trainy, validation_data=(testX, testy), epochs=10, verbose=0)
        acc_train.extend(history.history['sparse_categorical_accuracy'])
        acc_test.extend(history.history['val_sparse_categorical_accuracy'])

        prediction = model.predict(trainX)
        prediction = np.argmax(model.predict(trainX), axis=1)

        for i in range(4):
            samples_ix = np.where(prediction == i)
            pyplot.scatter(trainX[samples_ix, 0], trainX[samples_ix, 1], label=str(i))
        pyplot.legend()
        pyplot.show()

    pyplot.plot(acc_train, label='train')
    pyplot.plot(acc_test, label='test')
    pyplot.legend()
    pyplot.show()

    print('Evaluation')
    model.evaluate(testX, testy)

In [None]:
# generate 2d classification dataset
X_1, y_1 = make_moons(n_samples=1000, random_state=42, noise=0.2)
scaler = MinMaxScaler(feature_range=(-1, 1))
X_1 = scaler.fit_transform(X_1)

X_2, y_2 = make_moons(n_samples=1000, random_state=42, noise=0.2)
scaler = MinMaxScaler(feature_range=(-1, 1))
X_2 = scaler.fit_transform(X_1)

X = np.append(X_1, X_2 * np.array([1.2, 0.8]) + np.array([-1, -1]), axis=0)
y = np.append(y_1, y_2 + 2, axis=0)

trainX, testX, trainy, testy = train_test_split(X, y, random_state=3)

for i in range(4):
    samples_ix = np.where(y == i)
    pyplot.scatter(X[samples_ix, 0], X[samples_ix, 1], label=str(i))
pyplot.legend()
pyplot.show()

#### Model with ReLU

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.RandomUniform(minval=0, maxval=1)

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(5, activation="relu", kernel_initializer=init)(inputs)

outputs = tf.keras.layers.Dense(4)(h)
outputs = tf.keras.activations.softmax(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.RandomUniform(minval=0, maxval=1)

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(100, activation="relu", kernel_initializer=init)(inputs)

outputs = tf.keras.layers.Dense(4)(h)
outputs = tf.keras.activations.softmax(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model)

In [None]:
tf.random.set_seed(42)

init = tf.keras.initializers.RandomUniform(minval=0, maxval=1)

inputs = tf.keras.Input(shape=(2,), name="input_values")
h = tf.keras.layers.Dense(50, activation="relu", kernel_initializer=init)(inputs)
h = tf.keras.layers.Dense(50, activation="relu", kernel_initializer=init)(inputs)
h = tf.keras.layers.Dense(50, activation="relu", kernel_initializer=init)(inputs)

outputs = tf.keras.layers.Dense(4)(h)
outputs = tf.keras.activations.softmax(outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

run(model)

In [None]:
# def run(model, iterations):
#     optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
#     loss_fn = tf.keras.losses.BinaryCrossentropy()

#     model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

#     acc_train = []
#     acc_test = []
#     loss_train = []
#     loss_test = []

#     for r in range(iterations):
#         print("After {} episodes:".format((r + 1) * 10))
#         history = model.fit(trainX, trainy, validation_data=(testX, testy), epochs=10, batch_size=100, verbose=0)
#         acc_train.extend(history.history['accuracy'])
#         acc_test.extend(history.history['val_accuracy'])
#         loss_train.extend(history.history['loss'])
#         loss_test.extend(history.history['val_loss'])

#         prediction = model.predict(trainX)
#         prediction = np.where(model.predict(trainX) > 0.5, 1, 0)

#         for i in range(2):
#             samples_ix = np.where(prediction == i)
#             pyplot.scatter(X[samples_ix, 0], X[samples_ix, 1], label=str(i))
#         pyplot.legend()
#         pyplot.show()

#     pyplot.plot(acc_train, label='train')
#     pyplot.plot(acc_test, label='test')
#     pyplot.legend()
#     pyplot.show()

#     pyplot.plot(loss_train, label='train')
#     pyplot.plot(loss_test, label='test')
#     pyplot.legend()
#     pyplot.show()

#     print('Evaluation')
#     model.evaluate(testX, testy)