## Pre

In [69]:
import matplotlib.pyplot as plt
from scipy.ndimage import label

from scripts.generate_regularization_layer_test_outputs import test_data
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for num_epochs epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')
    ax_1.set_ylabel('Error')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    ax_2.set_xlabel('Accuracy')
    
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [70]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
import sys
# sys.path.append('/path/to/mlpractical')
from mlp.data_providers import MNISTDataProvider, EMNISTDataProvider

# Seed a random number generator
seed = 11102019 
rng = np.random.RandomState(seed)
batch_size = 100
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)

KeysView(NpzFile '/Users/ycy/PycharmProjects/mlpractical/data/emnist-train.npz' with keys: inputs, targets)
KeysView(NpzFile '/Users/ycy/PycharmProjects/mlpractical/data/emnist-valid.npz' with keys: inputs, targets)


## Baseline

In [None]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

%pip install tqdm

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, DropoutLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule
from mlp.optimisers import Optimiser

train_data.reset()
valid_data.reset()

# Setup hyperparameters
learning_rate = 1e-4
num_epochs = 100
stats_interval = 1  # Every 1 epoch record the error and acc data
input_dim, output_dim, hidden_dim = 784, 47, 128

final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

# Create model with 3 hidden layer
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

# Remember to use notebook=False when you write a script to be run in a terminal
stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

print('    final error(train) = {0:.2e}'.format(stats[-1, keys['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(stats[-1, keys['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(stats[-1, keys['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(stats[-1, keys['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(run_time * 1. / num_epochs))

final_errors_train.append(stats[-1, keys['error(train)']])
final_errors_valid.append(stats[-1, keys['error(valid)']])
final_accs_train.append(stats[-1, keys['acc(train)']])
final_accs_valid.append(stats[-1, keys['acc(valid)']])

## Dropout

In [6]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

%pip install tqdm

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, DropoutLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule
from mlp.optimisers import Optimiser

train_data.reset()
valid_data.reset()

# Setup hyperparameters
learning_rate = 1e-4
num_epochs = 100
stats_interval = 1  # Every 1 epoch record the error and acc data
input_dim, output_dim, hidden_dim = 784, 47, 128
dropout_rate = 0.7

final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []
stats_list = []
keys_list = []

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

# Create model with 3 hidden layer
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    DropoutLayer(rng=rng, incl_prob=dropout_rate),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    DropoutLayer(rng=rng, incl_prob=dropout_rate),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    DropoutLayer(rng=rng, incl_prob=dropout_rate),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

# Remember to use notebook=False when you write a script to be run in a terminal
stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

print('    final error(train) = {0:.2e}'.format(stats[-1, keys['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(stats[-1, keys['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(stats[-1, keys['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(stats[-1, keys['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(run_time * 1. / num_epochs))

final_errors_train.append(stats[-1, keys['error(train)']])
final_errors_valid.append(stats[-1, keys['error(valid)']])
final_accs_train.append(stats[-1, keys['acc(train)']])
final_accs_valid.append(stats[-1, keys['acc(valid)']])

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Note: you may need to restart the kernel to use updated packages.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch 1: 8.3s to complete
    error(train)=1.32e+00, acc(train)=6.29e-01, error(valid)=1.33e+00, acc(valid)=6.24e-01


  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch 2: 5.9s to complete
    error(train)=1.07e+00, acc(train)=6.91e-01, error(valid)=1.08e+00, acc(valid)=6.87e-01


  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch 3: 7.8s to complete
    error(train)=9.28e-01, acc(train)=7.26e-01, error(valid)=9.46e-01, acc(valid)=7.18e-01


    final error(train) = 9.28e-01
    final error(valid) = 9.46e-01
    final acc(train)   = 7.26e-01
    final acc(valid)   = 7.18e-01
    run time per epoch = 10.01


In [None]:
stats_dropout = stats
keys_dropout = keys

## L1

In [None]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

%pip install tqdm

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, DropoutLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule
from mlp.optimisers import Optimiser

train_data.reset()
valid_data.reset()

# Setup hyperparameters
learning_rate = 1e-4
num_epochs = 100
stats_interval = 1  # Every 1 epoch record the error and acc data
input_dim, output_dim, hidden_dim = 784, 47, 128
dropout_rate = 0.7

final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []
stats_list = []
keys_list = []

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

# Create model with 3 hidden layer
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

# Remember to use notebook=False when you write a script to be run in a terminal
stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

print('    final error(train) = {0:.2e}'.format(stats[-1, keys['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(stats[-1, keys['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(stats[-1, keys['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(stats[-1, keys['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(run_time * 1. / num_epochs))

final_errors_train.append(stats[-1, keys['error(train)']])
final_errors_valid.append(stats[-1, keys['error(valid)']])
final_accs_train.append(stats[-1, keys['acc(train)']])
final_accs_valid.append(stats[-1, keys['acc(valid)']])

## L2

In [78]:
from mlp.penalties import L2Penalty

train_data.reset()
valid_data.reset()

# Setup hyperparameters
learning_rate = 1e-4
num_epochs = 100
stats_interval = 1  # Every 1 epoch record the error and acc data
input_dim, output_dim, hidden_dim = 784, 47, 128
l2_coefficient = 1e-3

final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []
stats_list = []
keys_list = []

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

l2_penalty = L2Penalty(coefficient=l2_coefficient)

# Create model with 3 hidden layer
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init, weights_penalty=l2_penalty, biases_penalty=l2_penalty),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init, weights_penalty=l2_penalty, biases_penalty=l2_penalty),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init, weights_penalty=l2_penalty, biases_penalty=l2_penalty),
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init, weights_penalty=l2_penalty, biases_penalty=l2_penalty)
])

# 原始的交叉熵损失函数
error = CrossEntropySoftmaxError()

# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

# Remember to use notebook=False when you write a script to be run in a terminal
stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

print('    final error(train) = {0:.2e}'.format(stats[-1, keys['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(stats[-1, keys['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(stats[-1, keys['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(stats[-1, keys['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(run_time * 1. / num_epochs))

final_errors_train.append(stats[-1, keys['error(train)']])
final_errors_valid.append(stats[-1, keys['error(valid)']])
final_accs_train.append(stats[-1, keys['acc(train)']])
final_accs_valid.append(stats[-1, keys['acc(valid)']])
stats_L2 = stats
keys_L2 = keys

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch 1: 7.0s to complete
    error(train)=3.85e+00, acc(train)=2.12e-02, error(valid)=3.85e+00, acc(valid)=2.15e-02


  0%|          | 0/1000 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [68]:
# wrong  but in Coursework is right Please compare and learn from it.
import matplotlib.pyplot as plt
import numpy as np

error_train_dropout = stats_dropout[1:, keys_dropout['error(train)']]
error_valid_dropout = stats_dropout[1:, keys_dropout['error(valid)']]
generalization_gap_dropout = error_valid_dropout - error_train_dropout
acc_valid_dropout = stats_dropout[1:, keys_dropout['acc(valid)']]

error_train_l1 = stats_l1[1:, keys_l1['error(train)']]
error_valid_l1 = stats_l1[1:, keys_l1['error(valid)']]
generalization_gap_l1 = error_valid_l1 - error_train_l1
acc_valid_l1 = stats_l1[1:, keys_l1['acc(valid)']]

error_train_l2 = stats_l2[1:, keys_l2['error(train)']]
error_valid_l2 = stats_l2[1:, keys_l2['error(valid)']]
generalization_gap_l2 = error_valid_l2 - error_train_l2
acc_valid_l2 = stats_l2[1:, keys_l2['acc(valid)']]

def plot_training_stats_regularization():
    fig_1 = plt.figure(figsize=(8, 4))
    
    ax_1 = fig_1.add_subplot(121)
    ax_1.set_xlabel('Dropout value')
    ax_1.set_ylabel('Accuracy')
    ax_2 = ax_1.twinx()
    ax_2.set_ylabel('Generalization gap', rotation=90)
    
    ax_1.plot(np.arange(1, len(acc_valid_dropout) + 1), acc_valid_dropout, label='Val. Acc', color='r')
    ax_2.plot(np.arange(1, len(generalization_gap_dropout) + 1), generalization_gap_dropout, label='Gap', color='b')

    ax_1.legend(loc='upper left')
    ax_2.legend(loc='upper right')
    

    ax_3 = fig_1.add_subplot(122)
    ax_3.set_xlabel('Weight dacay value')
    ax_3.set_ylabel('Accuracy', rotation=90)
    ax_4 = ax_3.twinx()
    ax_4.set_ylabel('Generalization gap', rotation=90)
    
    ax_3.plot(np.arange(1, len(acc_valid_l1) + 1), acc_valid_l1, label='L1 Val. Acc', color='r')
    ax_3.plot(np.arange(1, len(generalization_gap_dropout) + 1), generalization_gap_dropout, label='L1 Gap', color='b')
    ax_4.plot(np.arange(1, len(acc_valid_l2) + 1), acc_valid_l2, label='L1 Val. Acc', color='r')
    ax_4.plot(np.arange(1, len(generalization_gap_dropout) + 1), generalization_gap_dropout, label='L1 Gap', color='b')
    
    plt.show()
    fig_1.savefig('../outputs/task2-regularization.pdf')

plot_training_stats_regularization()


NameError: name 'stats_dropout' is not defined

## label smoothing

In [None]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

%pip install tqdm

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, DropoutLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule
from mlp.optimisers import Optimiser

train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng, smooth_labels=True)
valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)

train_data.reset()
valid_data.reset()

# Setup hyperparameters
learning_rate = 1e-4
num_epochs = 100
stats_interval = 1  # Every 1 epoch record the error and acc data
input_dim, output_dim, hidden_dim = 784, 47, 128

final_errors_train = []
final_errors_valid = []
final_accs_train = []
final_accs_valid = []


weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

# Create model with 3 hidden layer
model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
    ReluLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()
# Use a Adam learning rule
learning_rule = AdamLearningRule(learning_rate=learning_rate)

# Remember to use notebook=False when you write a script to be run in a terminal
stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

print('    final error(train) = {0:.2e}'.format(stats[-1, keys['error(train)']]))
print('    final error(valid) = {0:.2e}'.format(stats[-1, keys['error(valid)']]))
print('    final acc(train)   = {0:.2e}'.format(stats[-1, keys['acc(train)']]))
print('    final acc(valid)   = {0:.2e}'.format(stats[-1, keys['acc(valid)']]))
print('    run time per epoch = {0:.2f}'.format(run_time * 1. / num_epochs))

final_errors_train.append(stats[-1, keys['error(train)']])
final_errors_valid.append(stats[-1, keys['error(valid)']])
final_accs_train.append(stats[-1, keys['acc(train)']])
final_accs_valid.append(stats[-1, keys['acc(valid)']])