<a href="https://colab.research.google.com/github/yuanwang1988/cs231n_project/blob/master/cs231n_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [0]:
#@title Imports
%matplotlib inline
import numpy as np                   # advanced math library
import matplotlib.pyplot as plt      # MATLAB like plotting routines
import os
import random                        # for generating random numbers
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model  # Model type to be used
from tensorflow.keras.layers import Dense, Dropout, Activation # Types of layers to be used in our model
from tensorflow.keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, Flatten, BatchNormalization

In [0]:
!pip install pyyaml h5py  # Required to save models in HDF5 format



In [0]:
print(tf.version.VERSION)

2.2.0


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
output_dir = F"/content/gdrive/My Drive/cs231n_project/" 

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Data Processing Functions

In [0]:
def print_class_distribution(labels):
  (unique, counts) = np.unique(labels, return_counts=True)
  for v, c in zip(unique, counts):
    print('{}: {}'.format(v,c))

In [0]:
def create_transfer_learning_data(
    X_train, y_train, X_test, y_test,
    source_task_classes, target_task_classes):
  
  X_train_src = X_train[np.isin(y_train, source_task_classes)]
  y_train_src = y_train[np.isin(y_train, source_task_classes)]
  
  X_test_src = X_test[np.isin(y_test, source_task_classes)]
  y_test_src = y_test[np.isin(y_test, source_task_classes)]

  X_train_tgt = X_train[np.isin(y_train, target_task_classes)]
  y_train_tgt = y_train[np.isin(y_train, target_task_classes)]
  
  X_test_tgt = X_test[np.isin(y_test, target_task_classes)]
  y_test_tgt = y_test[np.isin(y_test, target_task_classes)]

  return (X_train_src, y_train_src, X_test_src, y_test_src, 
          X_train_tgt, y_train_tgt, X_test_tgt, y_test_tgt)

In [0]:
def shuffle_dataset(X, y):
  assert len(X) == len(y)
  shuffle_order = np.random.permutation(len(X))
  return X[shuffle_order], y[shuffle_order]

In [0]:
def subsample_train_data(X_train, y_train, n_samples_per_class):
  X_train_subsamples = []
  y_train_subsamples = []
  for label in np.unique(y_train):
    mask = (y_train==label)
    n_examples = len(y_train[mask])
    X_train_subsamples.append(X_train[mask][:min(n_examples, n_samples_per_class)])
    y_train_subsamples.append(y_train[mask][:min(n_examples, n_samples_per_class)])
    
  X_train_sample = np.concatenate(X_train_subsamples)
  y_train_sample = np.concatenate(y_train_subsamples)

  return X_train_sample, y_train_sample

# Helper Functions

In [0]:
def print_accuracy_results(accuracy_results):
  print('num examples per class: accuracy')
  for n, acc in accuracy_results.items():
    print('{}: {:.4f}'.format(n, acc))

# MNIST

## Load Data

In [0]:
from keras.datasets import mnist     # MNIST dataset is included in Keras

In [0]:
# The MNIST data is split between 60,000 28 x 28 pixel training images and 10,000 28 x 28 pixel images
(X_train, y_train), (X_test, y_test) = mnist.load_data()

print("X_train shape", X_train.shape)
print("y_train shape", y_train.shape)
print("X_test shape", X_test.shape)
print("y_test shape", y_test.shape)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
X_train shape (60000, 28, 28)
y_train shape (60000,)
X_test shape (10000, 28, 28)
y_test shape (10000,)


## Create Transfer Learning Dataset

In [0]:
source_task_classes = [0,1,2,3,4]
target_task_classes = [5,6,7,8,9]
n_examples_per_class_list = [1, 5, 10]

In [0]:
dataset = create_transfer_learning_data(X_train, y_train, X_test, y_test, source_task_classes, target_task_classes)
X_train_src, y_train_src, X_test_src, y_test_src, X_train_tgt, y_train_tgt, X_test_tgt, y_test_tgt = dataset

In [0]:
X_train_tgt, y_train_tgt = shuffle_dataset(X_train_tgt, y_train_tgt)

In [0]:
print_class_distribution(y_train_src)

0: 5923
1: 6742
2: 5958
3: 6131
4: 5842


In [0]:
print_class_distribution(y_test_src)

0: 980
1: 1135
2: 1032
3: 1010
4: 982


In [0]:
print_class_distribution(y_train_tgt)

5: 5421
6: 5918
7: 6265
8: 5851
9: 5949


In [0]:
print_class_distribution(y_test_tgt)

5: 892
6: 958
7: 1028
8: 974
9: 1009


## Fully Connected DNN

In [0]:
def preprocess_data_fc_model(X, y, label_map=None):
  X = X.reshape(X.shape[0], -1) # reshape 60,000 28 x 28 matrices into 60,000 784-length vectors.
  X = X.astype('float32')
  X /= 255

  if label_map is None:
    label_map = {}
    class_labels = np.unique(y)
    for i in range(len(class_labels)):
      label_map[class_labels[i]] = i

  def map_label(label):
    return label_map[label]

  nb_classes = len(np.unique(y))
  y = np.vectorize(map_label)(y)
  y = np_utils.to_categorical(y, nb_classes)

  return X, y, label_map

In [0]:
def create_simple_fc_model(
    input_shape,
    num_classes,
    hidden_layer_dims,
    drop_out_prob):
  model = Sequential(
      layers=[Dense(hidden_layer_dims[0], input_shape=input_shape, activation='relu', name='fc_1'),
              Dropout(drop_out_prob),
              Dense(hidden_layer_dims[1], activation='relu', name='fc_2'),
              Dropout(drop_out_prob),
              Dense(num_classes, name = 'fc_3'),
              Activation('softmax')
      ],
      name = 'FC1')
  
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  
  return model

### Sanity Check (Train and Eval on the Entire Target Training Set)

This checks whether the model architecture is good given large amount of training data.

In [0]:
model = create_simple_fc_model(
    input_shape=(784,),
    num_classes=len(target_task_classes),
    hidden_layer_dims=[512,512],
    drop_out_prob=0.2
)

In [0]:
model.summary()

Model: "FC1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
fc_1 (Dense)                 (None, 512)               401920    
_________________________________________________________________
dropout_7 (Dropout)          (None, 512)               0         
_________________________________________________________________
fc_2 (Dense)                 (None, 512)               262656    
_________________________________________________________________
dropout_8 (Dropout)          (None, 512)               0         
_________________________________________________________________
fc_3 (Dense)                 (None, 5)                 2565      
_________________________________________________________________
activation_30 (Activation)   (None, 5)                 0         
Total params: 667,141
Trainable params: 667,141
Non-trainable params: 0
_________________________________________________________

In [0]:
X_train_tgt_sample, y_train_tgt_sample = subsample_train_data(X_train_tgt[:-1000], y_train_tgt[:-1000], 10000)

In [0]:
X_train_tgt_fc_sample, y_train_tgt_fc_sample, label_map = preprocess_data_fc_model(X_train_tgt_sample, y_train_tgt_sample)
X_valid_tgt_fc, y_valid_tgt_fc, label_map = preprocess_data_fc_model(X_train_tgt[1000:], y_train_tgt[1000:], label_map)
X_test_tgt_fc, y_test_tgt_fc, label_map = preprocess_data_fc_model(X_test_tgt, y_test_tgt, label_map)

In [0]:
print_class_distribution(y_train_tgt_sample)

5: 5255
6: 5734
7: 6040
8: 5646
9: 5729


In [0]:
checkpoint_path = output_dir+"mnist_fc_sanity_check/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                              verbose=1)

In [0]:
model.fit(X_train_tgt_fc_sample, y_train_tgt_fc_sample,
          batch_size=128, 
          epochs=10,
          verbose=1,
          validation_data=(X_valid_tgt_fc, y_valid_tgt_fc),
          callbacks=[cp_callback])

Epoch 1/10
Epoch 00001: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_sanity_check/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_sanity_check/cp.ckpt/assets
Epoch 2/10
Epoch 00002: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_sanity_check/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_sanity_check/cp.ckpt/assets
Epoch 3/10
Epoch 00003: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_sanity_check/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_sanity_check/cp.ckpt/assets
Epoch 4/10
Epoch 00004: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_sanity_check/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_sanity_check/cp.ckpt/assets
Epoch 5/10
Epoch 00005: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_sanity_check/cp.ckpt
IN

<tensorflow.python.keras.callbacks.History at 0x7f59c747dcf8>

In [0]:
score = model.evaluate(X_test_tgt_fc, y_test_tgt_fc)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.05829891562461853
Test accuracy: 0.9862168431282043


### Baseline No Transfer Learning

In [0]:
test_accuracy_results = {}

for n_examples_per_class in n_examples_per_class_list:
  X_train_tgt_sample, y_train_tgt_sample = subsample_train_data(X_train_tgt[:-1000], y_train_tgt[:-1000], n_examples_per_class)
  assert len(y_train_tgt_sample) <= n_examples_per_class * len(np.unique(y_train_tgt))
  
  X_train_tgt_fc_sample, y_train_tgt_fc_sample, label_map = preprocess_data_fc_model(X_train_tgt_sample, y_train_tgt_sample)
  X_valid_tgt_fc, y_valid_tgt_fc, label_map = preprocess_data_fc_model(X_train_tgt[1000:], y_train_tgt[1000:], label_map)
  X_test_tgt_fc, y_test_tgt_fc, label_map = preprocess_data_fc_model(X_test_tgt, y_test_tgt, label_map)

  model = create_simple_fc_model(
    input_shape=(784,),
    num_classes=len(target_task_classes),
    hidden_layer_dims=[512,512],
    drop_out_prob=0.2
  )
  # checkpoint_path = "cs231n/project/model_checkpoints/minist_fc_test/cp.ckpt"
  checkpoint_path = output_dir+"mnist_fc_no_tl_{}_samples_per_class/cp.ckpt".format(n_examples_per_class)
  checkpoint_dir = os.path.dirname(checkpoint_path)

  # Create a callback that saves the model's weights
  cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                                verbose=1)
  model.fit(X_train_tgt_fc_sample, y_train_tgt_fc_sample,
          batch_size=128, 
          epochs=10,
          verbose=1,
          validation_data=(X_valid_tgt_fc, y_valid_tgt_fc),
          callbacks=[cp_callback])
  
  score = model.evaluate(X_test_tgt_fc, y_test_tgt_fc)
  print('+++++++++++++++++++++++++++++++++++')
  print('num examples per class', n_examples_per_class)
  print('Test score:', score[0])
  print('Test accuracy:', score[1])
  print('+++++++++++++++++++++++++++++++++++')
  test_accuracy_results[n_examples_per_class] = score[1]

Epoch 1/10
Epoch 00001: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_no_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_no_tl_1_samples_per_class/cp.ckpt/assets
Epoch 2/10
Epoch 00002: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_no_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_no_tl_1_samples_per_class/cp.ckpt/assets
Epoch 3/10
Epoch 00003: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_no_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_no_tl_1_samples_per_class/cp.ckpt/assets
Epoch 4/10
Epoch 00004: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_no_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_no_tl_1_samples_per_class/cp.ckpt/assets
Epoch 5/

In [0]:
print_accuracy_results(test_accuracy_results)

num examples per class: accuracy
1: 0.5565
5: 0.7484
10: 0.8177


### With Transfer Learning

In [0]:
X_train_src_fc, y_train_src_fc, label_map = preprocess_data_fc_model(X_train_src, y_train_src)
X_test_src_fc, y_test_src_fc, label_map = preprocess_data_fc_model(X_test_src, y_test_src, label_map)

In [0]:
label_map

{0: 0, 1: 1, 2: 2, 3: 3, 4: 4}

In [0]:
pre_trained_model = create_simple_fc_model(
    input_shape=(784,),
    num_classes=len(target_task_classes),
    hidden_layer_dims=[512,512],
    drop_out_prob=0.2
)

In [0]:
checkpoint_path = output_dir+"mnist_fc_tl_pretrained_model/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                              verbose=1)

In [0]:
pre_trained_model.fit(X_train_src_fc[:-1000], y_train_src_fc[:-1000],
          batch_size=128, 
          epochs=10,
          verbose=1,
          validation_data=(X_train_src_fc[1000:], y_train_src_fc[1000:]),
          callbacks=[cp_callback])

Epoch 1/10
Epoch 00001: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_pretrained_model/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_pretrained_model/cp.ckpt/assets
Epoch 2/10
Epoch 00002: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_pretrained_model/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_pretrained_model/cp.ckpt/assets
Epoch 3/10
Epoch 00003: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_pretrained_model/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_pretrained_model/cp.ckpt/assets
Epoch 4/10
Epoch 00004: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_pretrained_model/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_pretrained_model/cp.ckpt/assets
Epoch 5/10
Epoch 00005: saving model to /content/gdrive/

<tensorflow.python.keras.callbacks.History at 0x7f59c3de1e48>

In [0]:
pre_trained_model = tf.keras.models.load_model(output_dir+"mnist_fc_tl_pretrained_model/cp.ckpt")

In [0]:
score = pre_trained_model.evaluate(X_test_src_fc, y_test_src_fc)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.021421929821372032
Test accuracy: 0.9945514798164368


In [0]:
score = pre_trained_model.evaluate(X_test_tgt_fc, y_test_tgt_fc)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 6.277988910675049
Test accuracy: 0.3750257194042206


In [0]:
def create_fine_tune_model(
    pre_trained_model,
    num_classes):
  pre_trained_model.layers[0].trainable=False
  output = Dense(num_classes)(pre_trained_model.layers[-3].output)
  output = Activation('softmax')(output)

  fine_tune_model = Model(inputs=pre_trained_model.inputs, outputs=output)
  fine_tune_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  return fine_tune_model

In [0]:
fine_tune_model = create_fine_tune_model(
    pre_trained_model,
    num_classes=5
)

In [0]:
fine_tune_model.summary()

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
fc_1 (Dense)                 (None, 512)               401920    
_________________________________________________________________
dropout_15 (Dropout)         (None, 512)               0         
_________________________________________________________________
fc_2 (Dense)                 (None, 512)               262656    
_________________________________________________________________
dropout_16 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 5)                 2565      
_________________________________________________________________
activation_36 (Activation)   (None, 5)                 0   

In [0]:
test_accuracy_results = {}

for n_examples_per_class in n_examples_per_class_list:
  X_train_tgt_sample, y_train_tgt_sample = subsample_train_data(X_train_tgt[:-1000], y_train_tgt[:-1000], n_examples_per_class)
  assert len(y_train_tgt_sample) <= n_examples_per_class * len(np.unique(y_train_tgt))
  
  X_train_tgt_fc_sample, y_train_tgt_fc_sample, label_map = preprocess_data_fc_model(X_train_tgt_sample, y_train_tgt_sample)
  X_valid_tgt_fc, y_valid_tgt_fc, label_map = preprocess_data_fc_model(X_train_tgt[1000:], y_train_tgt[1000:], label_map)
  X_test_tgt_fc, y_test_tgt_fc, label_map = preprocess_data_fc_model(X_test_tgt, y_test_tgt, label_map)

  model = create_fine_tune_model(
    pre_trained_model,
    num_classes=5)
  
  # checkpoint_path = "cs231n/project/model_checkpoints/minist_fc_test/cp.ckpt"
  checkpoint_path = output_dir+"mnist_fc_tl_{}_samples_per_class/cp.ckpt".format(n_examples_per_class)
  checkpoint_dir = os.path.dirname(checkpoint_path)

  # Create a callback that saves the model's weights
  cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                                verbose=1)
  model.fit(X_train_tgt_fc_sample, y_train_tgt_fc_sample,
          batch_size=128, 
          epochs=10,
          verbose=1,
          validation_data=(X_valid_tgt_fc, y_valid_tgt_fc),
          callbacks=[cp_callback])
  
  score = model.evaluate(X_test_tgt_fc, y_test_tgt_fc)
  print('+++++++++++++++++++++++++++++++++++')
  print('num examples per class', n_examples_per_class)
  print('Test score:', score[0])
  print('Test accuracy:', score[1])
  print('+++++++++++++++++++++++++++++++++++')
  test_accuracy_results[n_examples_per_class] = score[1]

Epoch 1/10
Epoch 00001: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_1_samples_per_class/cp.ckpt/assets
Epoch 2/10
Epoch 00002: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_1_samples_per_class/cp.ckpt/assets
Epoch 3/10
Epoch 00003: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_1_samples_per_class/cp.ckpt/assets
Epoch 4/10
Epoch 00004: saving model to /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_fc_tl_1_samples_per_class/cp.ckpt/assets
Epoch 5/10
Epoch 00005: saving m

In [0]:
print_accuracy_results(test_accuracy_results)

num examples per class: accuracy
1: 0.5069
5: 0.7227
10: 0.7776


## CNN

In [0]:
def preprocess_data_cnn_model(X, y, label_map=None):
  X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1) # reshape 60,000 28 x 28 matrices into 60,000 784-length vectors.
  X = X.astype('float32')
  X /= 255

  if label_map is None:
    label_map = {}
    class_labels = np.unique(y)
    for i in range(len(class_labels)):
      label_map[class_labels[i]] = i

  def map_label(label):
    return label_map[label]

  nb_classes = len(np.unique(y))
  y = np.vectorize(map_label)(y)
  y = np_utils.to_categorical(y, nb_classes)

  return X, y, label_map

In [0]:
def create_simple_cnn(
    input_shape,
    num_classes,
    filters,
    fc_sizes,
    dropout_prob):
  model = Sequential()                                 # Linear stacking of layers

  # Convolution Layer 1
  filter0 = filters[0]
  model.add(Conv2D(filter0[0], (filter0[1], filter0[2]), input_shape=input_shape)) # 32 different 3x3 kernels -- so 32 feature maps
  model.add(BatchNormalization(axis=-1))               # normalize each feature map before activation
  convLayer01 = Activation('relu')                     # activation
  model.add(convLayer01)

  # Convolution Layer 2
  filter1 = filters[1]
  model.add(Conv2D(filter1[0], (filter1[1], filter1[2])))                        # 32 different 3x3 kernels -- so 32 feature maps
  model.add(BatchNormalization(axis=-1))               # normalize each feature map before activation
  model.add(Activation('relu'))                        # activation
  convLayer02 = MaxPooling2D(pool_size=(2,2))          # Pool the max values over a 2x2 kernel
  model.add(convLayer02)

  # Convolution Layer 3
  filter2 = filters[2]
  model.add(Conv2D(filter2[0],(filter2[1], filter2[2])))                         # 64 different 3x3 kernels -- so 64 feature maps
  model.add(BatchNormalization(axis=-1))               # normalize each feature map before activation
  convLayer03 = Activation('relu')                     # activation
  model.add(convLayer03)

  # Convolution Layer 4
  filter3 = filters[3]
  model.add(Conv2D(filter3[0], (filter3[1], filter3[2])))                        # 64 different 3x3 kernels -- so 64 feature maps
  model.add(BatchNormalization(axis=-1))               # normalize each feature map before activation
  model.add(Activation('relu'))                        # activation
  convLayer04 = MaxPooling2D(pool_size=(2,2))          # Pool the max values over a 2x2 kernel
  model.add(convLayer04)
  model.add(Flatten())                                 # Flatten final 4x4x64 output matrix into a 1024-length vector

  # Fully Connected Layer 5
  model.add(Dense(fc_sizes[0]))                                # 512 FCN nodes
  model.add(BatchNormalization())                      # normalization
  model.add(Activation('relu'))                        # activation
                       
  model.add(Dropout(dropout_prob))                              # 20% dropout of randomly selected nodes
  
  # Fully Connected Layer 6
  model.add(Dense(num_classes))                                 # final 10 FCN nodes
  model.add(Activation('softmax'))                     # softmax activation

  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

### Sanity Check (Train and Eval on the Entire Target Training Set)

In [0]:
model = create_simple_cnn(
    input_shape=(28,28,1),
    num_classes=5,
    filters=[(32,3,3),(32,3,3),(64,3,3),(64,3,3)],
    fc_sizes=[512],
    dropout_prob=0.2)

In [0]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_31 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_39 (Batc (None, 26, 26, 32)        128       
_________________________________________________________________
activation_56 (Activation)   (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_40 (Batc (None, 24, 24, 32)        128       
_________________________________________________________________
activation_57 (Activation)   (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 12, 12, 32)       

In [0]:
X_train_tgt_sample, y_train_tgt_sample = subsample_train_data(X_train_tgt[:-1000], y_train_tgt[:-1000], 10000)

In [0]:
X_train_tgt_cnn_sample, y_train_tgt_cnn_sample, label_map = preprocess_data_cnn_model(X_train_tgt_sample, y_train_tgt_sample)
X_valid_tgt_cnn, y_valid_tgt_cnn, label_map = preprocess_data_cnn_model(X_train_tgt[1000:], y_train_tgt[1000:], label_map)
X_test_tgt_cnn, y_test_tgt_cnn, label_map = preprocess_data_cnn_model(X_test_tgt, y_test_tgt, label_map)

In [0]:
gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                         height_shift_range=0.08, zoom_range=0.08)

test_gen = ImageDataGenerator()

In [0]:
train_generator = gen.flow(X_train_tgt_cnn_sample, y_train_tgt_cnn_sample, batch_size=128)
test_generator = test_gen.flow(X_valid_tgt_cnn, y_valid_tgt_cnn, batch_size=128)

In [0]:
checkpoint_path = output_dir+"mnist_cnn_sanity_check/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                              verbose=1)

In [0]:
model.fit(train_generator, 
          steps_per_epoch=60000//128, 
          epochs=5, 
          verbose=1, 
          validation_data=test_generator,
          validation_steps=10000//128,
          callbacks=[cp_callback])

Epoch 1/5
Epoch 00001: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_sanity_check/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_sanity_check/cp.ckpt/assets
Epoch 2/5
Epoch 00002: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_sanity_check/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_sanity_check/cp.ckpt/assets
Epoch 3/5
Epoch 00003: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_sanity_check/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_sanity_check/cp.ckpt/assets
Epoch 4/5
Epoch 00004: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_sanity_check/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_sanity_check/cp.ckpt/assets
Epoch 5/5
Epoch 00005: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_sanity_check/cp.ckp

<tensorflow.python.keras.callbacks.History at 0x7f59c721e4e0>

In [0]:
score = model.evaluate(X_test_tgt_cnn, y_test_tgt_cnn)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.009285828098654747
Test accuracy: 0.9973256587982178


### Baseline No Transfer Learning

In [0]:
test_accuracy_results = {}

for n_examples_per_class in n_examples_per_class_list:
  X_train_tgt_sample, y_train_tgt_sample = subsample_train_data(X_train_tgt[:-1000], y_train_tgt[:-1000], n_examples_per_class)
  assert len(y_train_tgt_sample) <= n_examples_per_class * len(np.unique(y_train_tgt))
  
  X_train_tgt_cnn_sample, y_train_tgt_cnn_sample, label_map = preprocess_data_cnn_model(X_train_tgt_sample, y_train_tgt_sample)
  X_valid_tgt_cnn, y_valid_tgt_cnn, label_map = preprocess_data_cnn_model(X_train_tgt[1000:], y_train_tgt[1000:], label_map)
  X_test_tgt_cnn, y_test_tgt_cnn, label_map = preprocess_data_cnn_model(X_test_tgt, y_test_tgt, label_map)

  model = create_simple_cnn(
      input_shape=(28,28,1),
      num_classes=5,
      filters=[(32,3,3),(32,3,3),(64,3,3),(64,3,3)],
      fc_sizes=[512],
      dropout_prob=0.2)
  
  gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                         height_shift_range=0.08, zoom_range=0.08)
  test_gen = ImageDataGenerator()

  train_generator = gen.flow(X_train_tgt_cnn_sample, y_train_tgt_cnn_sample, batch_size=128)
  test_generator = test_gen.flow(X_valid_tgt_cnn, y_valid_tgt_cnn, batch_size=128)

  # checkpoint_path = "cs231n/project/model_checkpoints/minist_fc_test/cp.ckpt"
  checkpoint_path = output_dir+"mnist_cnn_no_tl_{}_samples_per_class/cp.ckpt".format(n_examples_per_class)
  checkpoint_dir = os.path.dirname(checkpoint_path)

  # Create a callback that saves the model's weights
  cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                                verbose=1)
  model.fit(train_generator, 
            steps_per_epoch=60000//128, 
            epochs=5, 
            verbose=1, 
            validation_data=test_generator,
            validation_steps=10000//128,
            callbacks=[cp_callback])
  
  score = model.evaluate(X_test_tgt_cnn, y_test_tgt_cnn)
  print('+++++++++++++++++++++++++++++++++++')
  print('num examples per class', n_examples_per_class)
  print('Test score:', score[0])
  print('Test accuracy:', score[1])
  print('+++++++++++++++++++++++++++++++++++')
  test_accuracy_results[n_examples_per_class] = score[1]

Epoch 1/5
Epoch 00001: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_no_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_no_tl_1_samples_per_class/cp.ckpt/assets
Epoch 2/5
Epoch 00002: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_no_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_no_tl_1_samples_per_class/cp.ckpt/assets
Epoch 3/5
Epoch 00003: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_no_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_no_tl_1_samples_per_class/cp.ckpt/assets
Epoch 4/5
Epoch 00004: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_no_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_no_tl_1_samples_per_class/cp.ckpt/assets
Epoc

In [0]:
print_accuracy_results(test_accuracy_results)

num examples per class: accuracy
1: 0.7046
5: 0.8216
10: 0.9313


### With Tranfer Learning

In [0]:
X_train_src_cnn, y_train_src_cnn, label_map = preprocess_data_cnn_model(X_train_src, y_train_src)
X_test_src_cnn, y_test_src_cnn, label_map = preprocess_data_cnn_model(X_test_src, y_test_src, label_map)

In [0]:
pre_trained_model = create_simple_cnn(
    input_shape=(28,28,1),
    num_classes=5,
    filters=[(32,3,3),(32,3,3),(64,3,3),(64,3,3)],
    fc_sizes=[512],
    dropout_prob=0.2)

In [0]:
pre_trained_model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_67 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_88 (Batc (None, 26, 26, 32)        128       
_________________________________________________________________
activation_118 (Activation)  (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_68 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_89 (Batc (None, 24, 24, 32)        128       
_________________________________________________________________
activation_119 (Activation)  (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_33 (MaxPooling (None, 12, 12, 32)      

In [0]:
print_class_distribution(y_test_src)

0: 980
1: 1135
2: 1032
3: 1010
4: 982


In [0]:
checkpoint_path = output_dir+"mnist_cnn_tl_pretrained_model/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                              verbose=1)

In [0]:
pre_trained_model.fit(X_train_src_cnn[:-1000], y_train_src_cnn[:-1000],
          batch_size=128, 
          epochs=10,
          verbose=1,
          validation_data=(X_train_src_cnn[1000:], y_train_src_cnn[1000:]),
          callbacks=[cp_callback])

Epoch 1/10
Epoch 00001: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_pretrained_model/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_pretrained_model/cp.ckpt/assets
Epoch 2/10
Epoch 00002: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_pretrained_model/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_pretrained_model/cp.ckpt/assets
Epoch 3/10
Epoch 00003: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_pretrained_model/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_pretrained_model/cp.ckpt/assets
Epoch 4/10
Epoch 00004: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_pretrained_model/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_pretrained_model/cp.ckpt/assets
Epoch 5/10
Epoch 00005: saving model to /content

<tensorflow.python.keras.callbacks.History at 0x7f59a4d40e48>

In [0]:
pre_trained_model = tf.keras.models.load_model(output_dir+"mnist_cnn_tl_pretrained_model/cp.ckpt")

In [0]:
score = pre_trained_model.evaluate(X_test_src_cnn, y_test_src_cnn)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.014575857669115067
Test accuracy: 0.996497392654419


In [0]:
score = pre_trained_model.evaluate(X_test_tgt_cnn, y_test_tgt_cnn)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 6.508804798126221
Test accuracy: 0.3205101788043976


In [0]:
for layer in pre_trained_model.layers:
  layer.trainable=False

In [0]:
pre_trained_model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_67 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_88 (Batc (None, 26, 26, 32)        128       
_________________________________________________________________
activation_118 (Activation)  (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_68 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_89 (Batc (None, 24, 24, 32)        128       
_________________________________________________________________
activation_119 (Activation)  (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_33 (MaxPooling (None, 12, 12, 32)      

In [0]:
def create_fine_tune_cnn_model(
    pre_trained_model,
    num_classes,
    fc_sizes,
    dropout_prob):
  output = pre_trained_model.layers[-7].output
  output = Dense(fc_sizes[0])(output)
  output = BatchNormalization()(output)
  output = Activation('relu')(output)
  output = Dropout(dropout_prob)(output)
  output = Dense(num_classes)(output)
  output = Activation('softmax')(output)
  fine_tune_model = Model(inputs=pre_trained_model.inputs, outputs=output)
  fine_tune_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  return fine_tune_model

In [0]:
fine_tune_model = create_fine_tune_cnn_model(
    pre_trained_model,
    num_classes=5,
    fc_sizes=[512],
    dropout_prob=0.2)

In [0]:
fine_tune_model.summary()

Model: "model_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_67 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_88 (Batc (None, 26, 26, 32)        128       
_________________________________________________________________
activation_118 (Activation)  (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_68 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_89 (Batc (None, 24, 24, 32)        128       
_________________________________________________________________
activation_119 (Activation)  (None, 24, 24, 32)        0  

In [0]:
test_accuracy_results = {}

for n_examples_per_class in n_examples_per_class_list:
  X_train_tgt_sample, y_train_tgt_sample = subsample_train_data(X_train_tgt[:-1000], y_train_tgt[:-1000], n_examples_per_class)
  assert len(y_train_tgt_sample) <= n_examples_per_class * len(np.unique(y_train_tgt))
  
  X_train_tgt_cnn_sample, y_train_tgt_cnn_sample, label_map = preprocess_data_cnn_model(X_train_tgt_sample, y_train_tgt_sample)
  X_valid_tgt_cnn, y_valid_tgt_cnn, label_map = preprocess_data_cnn_model(X_train_tgt[1000:], y_train_tgt[1000:], label_map)
  X_test_tgt_cnn, y_test_tgt_cnn, label_map = preprocess_data_cnn_model(X_test_tgt, y_test_tgt, label_map)

  model = create_fine_tune_cnn_model(
    pre_trained_model,
    num_classes=5,
    fc_sizes=[512],
    dropout_prob=0.2)
  
  gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                         height_shift_range=0.08, zoom_range=0.08)
  test_gen = ImageDataGenerator()

  train_generator = gen.flow(X_train_tgt_cnn_sample, y_train_tgt_cnn_sample, batch_size=128)
  test_generator = test_gen.flow(X_valid_tgt_cnn, y_valid_tgt_cnn, batch_size=128)

  # checkpoint_path = "cs231n/project/model_checkpoints/minist_fc_test/cp.ckpt"
  checkpoint_path = output_dir+"mnist_cnn_tl_{}_samples_per_class/cp.ckpt".format(n_examples_per_class)
  checkpoint_dir = os.path.dirname(checkpoint_path)

  # Create a callback that saves the model's weights
  cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                                verbose=1)
  model.fit(train_generator, 
            steps_per_epoch=60000//128, 
            epochs=5, 
            verbose=1, 
            validation_data=test_generator,
            validation_steps=10000//128,
            callbacks=[cp_callback])
  
  score = model.evaluate(X_test_tgt_cnn, y_test_tgt_cnn)
  print('+++++++++++++++++++++++++++++++++++')
  print('num examples per class', n_examples_per_class)
  print('Test score:', score[0])
  print('Test accuracy:', score[1])
  print('+++++++++++++++++++++++++++++++++++')
  test_accuracy_results[n_examples_per_class] = score[1]

Epoch 1/5
Epoch 00001: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_1_samples_per_class/cp.ckpt/assets
Epoch 2/5
Epoch 00002: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_1_samples_per_class/cp.ckpt/assets
Epoch 3/5
Epoch 00003: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_1_samples_per_class/cp.ckpt/assets
Epoch 4/5
Epoch 00004: saving model to /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_1_samples_per_class/cp.ckpt
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/cs231n_project/mnist_cnn_tl_1_samples_per_class/cp.ckpt/assets
Epoch 5/5
Epoch 00005: savin

In [0]:
print_accuracy_results(test_accuracy_results)

num examples per class: accuracy
1: 0.7840
5: 0.8807
10: 0.9321
