In [1]:
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Reshape
from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD, Adam
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras.utils import to_categorical

Using TensorFlow backend.


### Task 1

In [2]:
#Download the data
from keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
# reshape dataset to have a single channel
x_train = x_train.reshape((x_train.shape[0], 28, 28, 1))
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1))

#reshape the image dataset into 1 dimension used for NN with no convolutional layers
x_train_1d = x_train.reshape((((x_train.shape[0], 28*28))))
x_test_1d = x_test.reshape((((x_test.shape[0], 28*28))))

In [3]:
#reshape y_train and y_test so everything is in 4 dimensions
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [180]:
#test if one-hot encoding worked
y_train[3]

array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [160]:
#investigate dataset
print('Loaded fashion_mnist database with {} training and {} testing samples'.format(len(y_train), len(y_test)))

Loaded fashion_mnist database with 60000 training and 10000 testing samples


In [161]:
# investigate input data size
x_train.shape
x_train_1d.shape

(60000, 784)

In [162]:
# investigate test data size
x_test.shape

(10000, 28, 28, 1)

In [163]:
#check the dimesions of y
y_train.shape

(60000, 10)

In [4]:
# normalize inputs from 0-255 to 0.0-1.0
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train / 255.0
x_test = x_test / 255.0

x_train_1d = x_train_1d.astype('float32')
x_test_1d = x_test_1d.astype('float32')
x_train_1d = x_train_1d / 255.0
x_test_1d = x_test_1d / 255.0

# Encode the outputs with one hot coding
num_classes = y_test.shape[1]

In [22]:
# Build the model
model = Sequential()
model.add(Dense(num_classes, activation = 'softmax', input_dim=28*28))
#model.add(Dense(num_classes, activation='softmax', input_shape=(28, 28, 1)))

# Define optimizer
lrate = 0.002
epochs = 5
decay = lrate/epochs
sgd = SGD(lr=lrate, momentum=0.7, decay=decay, nesterov=False) #Stochastic gradient descent optimizer

# Compile model
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.summary()

tf.set_random_seed(1)
np.random.seed(1)

epochs = 5
# Fit the model
model.fit(x_train_1d, y_train, validation_data=(x_test_1d, y_test), epochs=epochs, batch_size=60, verbose=1)

# Final evaluation of the model
scores = model.evaluate(x_test_1d, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 10)                7850      
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 78.74%


Add another dense layer

In [25]:
# Build the model
model = Sequential()
model.add(Dense(20, activation = 'relu', input_dim=28*28))
model.add(Dense(num_classes, activation='softmax'))

# Define optimizer
lrate = 0.002
epochs = 5
decay = lrate/epochs
sgd = SGD(lr=lrate, momentum=0.7, decay=decay, nesterov=False) #Stochastic gradient descent optimizer

# Compile model
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.summary()

tf.set_random_seed(1)
np.random.seed(1)

epochs = 5
# Fit the model
model.fit(x_train_1d, y_train, validation_data=(x_test_1d, y_test), epochs=epochs, batch_size=60, verbose=1)

# Final evaluation of the model
scores = model.evaluate(x_test_1d, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 20)                15700     
_________________________________________________________________
dense_12 (Dense)             (None, 10)                210       
Total params: 15,910
Trainable params: 15,910
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 79.75%


### Task 2

In [206]:
# Build the model
model = Sequential()
model.add(Conv2D(28, (3, 3), input_shape=(28, 28, 1), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
#model.add(Conv2D(28, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
#model.add(MaxPooling2D())
model.add(Flatten())
#model.add(Dense(512, activation='relu', kernel_constraint=maxnorm(3)))
#model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

# Define optimizer
lrate = 0.002
epochs = 5
decay = lrate/epochs
sgd = SGD(lr=lrate, momentum=0.7, decay=decay, nesterov=False) #Stochastic gradient descent optimizer

# Compile model
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.summary()

tf.set_random_seed(1)
np.random.seed(1)

epochs = 5
# Fit the model
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs, batch_size=60, verbose=1)

# Final evaluation of the model
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Model: "sequential_38"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 28, 28, 28)        280       
_________________________________________________________________
flatten_8 (Flatten)          (None, 21952)             0         
_________________________________________________________________
dense_40 (Dense)             (None, 10)                219530    
Total params: 219,810
Trainable params: 219,810
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 82.85%


Add a 50% dropout, another convolution layer, maxpooling and a dense layer

In [201]:
# Build the model
model = Sequential()
model.add(Conv2D(28, (3, 3), input_shape=(28, 28, 1), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
model.add(Conv2D(28, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Define optimizer
lrate = 0.002
epochs = 5
decay = lrate/epochs
sgd = SGD(lr=lrate, momentum=0.7, decay=decay, nesterov=False) #Stochastic gradient descent optimizer

# Compile model
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.summary()

tf.set_random_seed(1)
np.random.seed(1)

epochs = 5
# Fit the model
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs, batch_size=60, verbose=1)

# Final evaluation of the model
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Model: "sequential_33"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 28, 28, 28)        280       
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 28, 28, 28)        7084      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 14, 14, 28)        0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 5488)              0         
_________________________________________________________________
dense_31 (Dense)             (None, 256)               1405184   
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_32 (Dense)             (None, 10)              

### Task 3

Change the learning rate from 0.002 to 0.5

In [205]:
# Build the model
model = Sequential()
model.add(Conv2D(28, (3, 3), input_shape=(28, 28, 1), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
model.add(Conv2D(28, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Define optimizer
lrate = 0.5
epochs = 5
decay = lrate/epochs
sgd = SGD(lr=lrate, momentum=0.7, decay=decay, nesterov=False) #Stochastic gradient descent optimizer

# Compile model
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.summary()

tf.set_random_seed(1)
np.random.seed(1)

epochs = 5
# Fit the model
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs, batch_size=60, verbose=1)

# Final evaluation of the model
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Model: "sequential_37"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_10 (Conv2D)           (None, 28, 28, 28)        280       
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 28, 28, 28)        7084      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 14, 14, 28)        0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 5488)              0         
_________________________________________________________________
dense_38 (Dense)             (None, 256)               1405184   
_________________________________________________________________
dropout_5 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_39 (Dense)             (None, 10)              

Change optimizer from SGD to Adam. However, learning rate of 0.5 did not run well so reduced learning rate to 0.0005

In [210]:
# Build the model
model = Sequential()
model.add(Conv2D(28, (3, 3), input_shape=(28, 28, 1), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
model.add(Conv2D(28, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Define optimizer
lrate = 0.0005
adam = Adam(learning_rate=lrate) #Adam optimizer

# Compile model
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

model.summary()

tf.set_random_seed(1)
np.random.seed(1)

epochs = 5
# Fit the model
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs, batch_size=60, verbose=1)

# Final evaluation of the model
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Model: "sequential_41"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_17 (Conv2D)           (None, 28, 28, 28)        280       
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 28, 28, 28)        7084      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 14, 14, 28)        0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 5488)              0         
_________________________________________________________________
dense_45 (Dense)             (None, 256)               1405184   
_________________________________________________________________
dropout_8 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_46 (Dense)             (None, 10)              