### Packages required

In [63]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import glob
import shutil
import tensorflow as tf
import tensorflow
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Dropout, MaxPool2D, BatchNormalization,GlobalAveragePooling2D,Input, Conv2DTranspose, Concatenate, Dense
from tensorflow.keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import random
import h5py
from IPython.display import display
from PIL import Image as im
import datetime
import random
from tensorflow.keras import layers
from tensorflow import keras
from aquvitae import dist, ST
from sklearn.model_selection import train_test_split
from tensorflow.keras import optimizers
from tensorflow.keras import backend as K

### Loading the train and validation data

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_data = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
test_data = ImageDataGenerator(rescale=1./255)
train_set = train_data.flow_from_directory('/home/dell/Downloads/archive/imagenet-mini/train',
                                         target_size=(224,224), batch_size=32, class_mode='categorical')
test_set = test_data.flow_from_directory('/home/dell/Downloads/archive/imagenet-mini/val',
                                         target_size=(224,224), batch_size=32, class_mode='categorical')

### CIFAR10

In [3]:
cifar10 = tf.keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [20]:
x_train, x_val, y_train, y_val = train_test_split(x_train, 
                                                  y_train, 
                                                  test_size=0.15, 
                                                  stratify=np.array(y_train), 
                                                  random_state=42)

In [38]:
train_datagen = ImageDataGenerator(rescale=1. / 255, horizontal_flip=False)
train_datagen.fit(x_train)
train_generator = train_datagen.flow(x_train, y_train, batch_size=32)

In [40]:
val_datagen = ImageDataGenerator(rescale=1. / 255, horizontal_flip=False)
val_datagen.fit(x_val)
val_generator = val_datagen.flow(x_val, y_val, batch_size=32)

In [54]:
test_datagen = ImageDataGenerator(rescale=1. /255, horizontal_flip=False)
test_datagen.fit(x_test)
test_generator = test_datagen.flow(x_test, y_test, batch_size=32)

In [55]:
train_steps_per_epoch = x_train.shape[0] // 32
val_steps_per_epoch = x_val.shape[0] // 32
test_steps_per_epoch = x_test.shape[0]//32

### Loading the Teacher Model

In [32]:
from tensorflow.keras.applications.vgg16 import VGG16
teacher_model = VGG16(input_shape=(32,32,3), include_top=False, weights='imagenet')

In [33]:
last = teacher_model.get_layer('block3_pool').output

In [34]:
# Add classification layers on top of it
x = GlobalAveragePooling2D()(last)
x= BatchNormalization()(x)
x = Dense(256, activation='relu')(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.6)(x)
pred = Dense(10, activation='softmax')(x)
teacher_model = Model(teacher_model.input, pred)

In [35]:
for layer in teacher_model.layers:
     layer.trainable = False

In [49]:
teacher_model.compile(loss='binary_crossentropy',
              optimizer=optimizers.Adam(lr=1e-4),
              metrics=['accuracy'])

In [37]:
teacher_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)         0   

In [11]:
for layer in tuple(teacher_model.layers):
        layer_type = type(layer).__name__
        if hasattr(layer, 'activation'):
            print(layer_type, layer.activation.__name__)

Conv2D relu
Conv2D relu
Conv2D relu
Conv2D relu
Conv2D relu
Conv2D relu
Conv2D relu
Dense relu
Dense relu
Dense softmax


In [50]:
history = teacher_model.fit_generator(train_generator,
                              steps_per_epoch=train_steps_per_epoch,
                              validation_data=val_generator,
                              validation_steps=val_steps_per_epoch,
                              epochs=5,
                              verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [51]:
teacher_model.save('vgg_teacher.h5')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: saved_model/vgg16_teacher/assets


### Testing the Teacher model

In [56]:
accuracy = teacher_model.evaluate_generator(test_generator, steps=test_steps_per_epoch)
print('The testing accuracy is:',accuracy[1]*100,'%')

Instructions for updating:
Please use Model.evaluate, which supports generators.
The testing accuracy is: 0.0 %


### Defining custom activation Function

In [117]:
from tensorflow.keras import backend as K
from tensorflow.python.keras.layers.core import Activation
from tensorflow.python.keras.utils.generic_utils import get_custom_objects

In [126]:
def custom_relu(x):
    return K.maximum(0.1,x)

### Softmax Taylor

In [98]:
def first_three_terms(x):
    x1 = []
    for i in x:
         x1.append(1 + i + i**2/2)
    return x1

In [160]:
def softmax_taylor(x):
    x = x.numpy()
    x = [1+i+np.power(i,2)/2 for i in x]
    return x/np.sum(x, axis=0)

### Soft Margin Softmax

In [None]:
def soft_margin_softmax(x):

In [None]:
def SM_taylor_softmax(x):

### Defining the Student model

In [161]:
def forming_student_model(model):
    index = None
    for idx, layer in enumerate(model.layers):
        print(idx, layer.name)
        if hasattr(layer, 'activation'):
            print(layer.activation.__name__)
            if layer.activation.__name__ == 'softmax':
                index = idx
                model.layers[index].activation = softmax_taylor
    return model

In [162]:
teacher_model = teacher_model.load_weights('saved_model/vgg16_teacher')
student_model = forming_student_model(teacher_model)

0 input_2
1 block1_conv1
relu
2 block1_conv2
relu
3 block1_pool
4 block2_conv1
relu
5 block2_conv2
relu
6 block2_pool
7 block3_conv1
relu
8 block3_conv2
relu
9 block3_conv3
relu
10 block3_pool
11 global_average_pooling2d_1
12 batch_normalization_1
13 dense_3
relu
14 dense_4
relu
15 dropout_1
16 dense_5
softmax


In [163]:
for layer in tuple(student_model.layers):
        layer_type = type(layer).__name__
        if hasattr(layer, 'activation'):
            print(layer_type, layer.activation.__name__)

Conv2D relu
Conv2D relu
Conv2D relu
Conv2D relu
Conv2D relu
Conv2D relu
Conv2D relu
Dense relu
Dense relu
Dense softmax_taylor


In [166]:
opti = Adam(learning_rate=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.001)
student_model.compile(optimizer = opti, loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'], run_eagerly=True)
student_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)         0   

In [167]:
vgg_student = student_model.fit_generator(train_generator,
                              steps_per_epoch=train_steps_per_epoch,
                              validation_data=val_generator,
                              validation_steps=val_steps_per_epoch,
                              epochs=5,
                              verbose=1)

Epoch 1/5


ValueError: Shapes (32, 1) and (32, 10) are incompatible

In [71]:
student_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)         0   

### Distillation

In [75]:
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(64)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(64)

In [76]:
alpha = dist(
    teacher= teacher_model,
    student= student_model,
    algo=ST(alpha=0.6, T=2.5),
    optimizer=tf.keras.optimizers.Adam(),
    train_ds=train_ds,
    test_ds=test_ds,
    iterations=300
)

Training - 300/300 [██████████████████████████████] ELP: 08:33, accuracy: 0.4149 - val_accuracy: 0.3705 


In [79]:
accuracy = student_model.evaluate_generator(test_generator, steps=test_steps_per_epoch)
print('The testing accuracy is:',accuracy[1]*100,'%')

The testing accuracy is: 0.0 %
