In [23]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.datasets import cifar10
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
K.set_image_dim_ordering('th')
import tensorflow
tensorflow.set_random_seed(7)
from sklearn.model_selection import train_test_split
import numpy

## Restricting GPU memory usage

The code here should be added to any work you do on Volta.  If you don't, then your code will monopolize all available memory on each of the 4 GPUs on the machine, preventing others from working on it.  If you do **that**, you will be frowned upon.

The code in the next cell has the effect that:
1. Memory use will start off with some small fraction of the memory on each GPU.
1. It will grow if necessary (since `allow_growth` is set to `True`).
1. It will max out at 5% of overall memory.  Given the GPUs we have, this gives you (4 x 808 MB), which should be sufficient here.

In [4]:
########################
# Limit TensorFlow GPU use.
config = tensorflow.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.05
K.tensorflow_backend.set_session(tensorflow.Session(config=config))
########################

## Load data

In [5]:
# load data
(X_train, y_train), (X_test, y_test) = cifar10.load_data()


## Normalize inputs from 0-255 to 0.0-1.0

In [19]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

#z-score
# mean = numpy.mean(X_train,axis=(0,1,2,3))
# std = numpy.std(X_train,axis=(0,1,2,3))
# X_train = (X_train-mean)/(std+1e-7)
# X_test = (X_test-mean)/(std+1e-7)

X_train = X_train / 255.0
X_test = X_test / 255.0

In [20]:
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
# print(y_test)

## First version of the CNN
This CNN contains 7 hidden layers, input layer and outpur layer. The structure is:  
1. input layer( 3 , 32, 32) 
1. first convolutional layer(32, each one has 3*3) 
1. dropout 20%
1. second convulutional layer
1. pooing layer( 2*2 )
1. Flatten layer
1. Fully connected layer(called Dense, size = 512)
1. dropout 50%
1. output layer( size = 10)  
  o dom
Accuracy for the testing data: 69.79%   
   
### Explanation:  
Sequential model: a linear stack of layers, which means the CNN is sequencial, all layers are from top to down  
input size is 3 * 32 * 32, which means 32*32 pixel RGB.  
dropout(0.2) means there are 20%(random) neurons were ignored when training the CNN, which is to fix the overfitting.  
Conv2D: 2D convolution layer. Because the inputs are 2d imgines, we chose the Conv2D as the convolutional layer in this CNN.  
Dense: all the neurons are conneceted to last layer's neurons.
relu/softmax: both are activition functions.

In [31]:
# Create the model
# model = Sequential()
# model.add(Conv2D(32, (3, 3), input_shape=(3, 32, 32), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
# model.add(Dropout(0.2))
# model.add(Conv2D(32, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Flatten())
# model.add(Dense(512, activation='relu', kernel_constraint=maxnorm(3)))
# model.add(Dropout(0.5))
# model.add(Dense(num_classes, activation='softmax'))


## Second version of the CNN
more complex and deeper than before.
1. input layer
1. Conv layer
1. dropout 20%
1. Conv layer
1. pooling layer(2*2)
1. Conv layer
1. dropout 20%
1. Conv layer
1. pooling layer
1. Conv layer
1. dropout 20%
1. Conv layer
1. pooling layer
1. Flatten layer
1. dropout 20%
1. all connected layer (Dense layer)
1. dropout 20&
1. all connected layer (Dense layer)
1. dropout 20%
1. output layer (size = 10)  
   
  
Accuracy for the testing data: 79.94%.
  
## Improve Model Performance  
1. increase the Epoch:  
change the Epoch parameters to 40. After 40 minutes runing, the accuracy is 80.02% （No significant improvement）.  

1. Image Data Augmentation  
I add an ImgdataGenerator from keras package to shift the img range. The acc is : 
1. Deeper Network Topolog
From the guidance, I try to add more layers but the it still can improve the accuracy a lot when I add four extra layers.

In [28]:
# Create the model
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(3, 32, 32), activation='relu', padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(1024, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

#data augmentation
# datagen = ImageDataGenerator(
#     featurewise_center=False,
#     samplewise_center=False,
#     featurewise_std_normalization=False,
#     samplewise_std_normalization=False,
#     zca_whitening=False,
#     rotation_range=15,
#     width_shift_range=0.1,
#     height_shift_range=0.1,
#     horizontal_flip=True,
#     vertical_flip=False
#     )
# datagen.fit(X_train)

## Compile model  
1. epochs: how many rounds we train the CNN.
1. lrate: learning rate.
1. sgd: Stochastic gradient descent optimizer.
1. using categorical_crossentropy as loss function because the output vector is not a one-hot vector.(binary crossentropy can not work.)

In [32]:
# Compile model
epochs = 25
# epochs = 40
lrate = 0.01
decay = lrate/epochs
sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_37 (Conv2D)           (None, 32, 32, 32)        896       
_________________________________________________________________
dropout_37 (Dropout)         (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 32, 32, 32)        9248      
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 32, 16, 16)        0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_19 (Dense)             (None, 512)               4194816   
_________________________________________________________________
dropout_38 (Dropout)         (None, 512)               0         
__________

## Evaluation of final model on testing data

In [33]:
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=32)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

ValueError: Error when checking target: expected dense_20 to have 2 dimensions, but got array with shape (50000, 10, 2, 2, 2)