<a href="https://colab.research.google.com/github/zjzsu2000/CMPE297_Sec49AdvanceDL/blob/master/Assignment_3/Assignment_3_d)_Knowledge_distillation_with_kera_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment_3_d)  Knowledge distillation with kera

**Optional : Knowledge distillation with kera (knowledge distillation practice - student-teacher training)**

In [1]:
import tensorflow as tf

from tensorflow.keras import models
from tensorflow.keras import layers

tf.random.set_seed(666)

## Load data

In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train = X_train/255.
X_test = X_test/255.



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


((60000, 28, 28), (10000, 28, 28), (60000,), (10000,))

In [3]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((60000, 28, 28), (10000, 28, 28), (60000,), (10000,))

## reshape data

In [4]:
X_train = X_train.astype("float32").reshape(-1, 28, 28, 1)
X_test = X_test.astype("float32").reshape(-1, 28, 28, 1)

In [5]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((60000, 28, 28, 1), (10000, 28, 28, 1), (60000,), (10000,))

## Define utility function for building a teacher model

In [9]:
def build_teacher_model():
    model = models.Sequential()
    model.add(layers.Conv2D(16, (5, 5), activation="relu", input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(32, (5, 5), activation="relu"))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Dropout(0.2))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation="relu"))
    model.add(layers.Dense(10))
    loss_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.Adam()
    model.compile(loss=loss_func, optimizer=optimizer, metrics=["accuracy"])
    return model

### Train the teacher model

In [11]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(100).batch(64)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(64)


teacher_model = build_teacher_model()
teacher_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 16)        416       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 8, 8, 32)          12832     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 32)          0         
_________________________________________________________________
dropout (Dropout)            (None, 4, 4, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               6

In [12]:
teacher_model.fit(train_dataset, validation_data=test_dataset,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f78300d2048>

### Evaluate the teacher model

In [15]:
evaluate_1=teacher_model.evaluate(test_dataset)[1]*100



In [16]:
print("Test accuracy: {:.2f}".format(evaluate_1))


Test accuracy: 90.37


### save the teacher model to .h5 file

In [18]:
teacher_model.save_weights("teacher_model.h5")

## build the student model

refs: https://github.com/google-research/simclr/blob/master/colabs/distillation_self_training.ipynb


In [22]:
from tensorflow.keras.layers import Input, Dense
from tensorflow import nn

In [31]:
def build_student_model():
    model = models.Sequential()
    model.add(Input(shape=(28, 28, 1)))
    model.add(layers.Flatten())
    model.add(Dense(48, activation="relu"))
    model.add(Dense(10))    
    return model

In [29]:
from tensorflow.compat.v1.losses import softmax_cross_entropy
from tensorflow.keras.metrics import *
from tensorflow.keras.optimizers import *

In [26]:
def get_kd_loss(teacher_log,student_log,temp=0.5):
    teacher_probs = nn.softmax(teacher_log / temp)
    kd_loss = softmax_cross_entropy(teacher_probs, student_log/ temp, temp**2)
    return kd_loss

In [33]:
student_model = build_student_model()
optimizer = Adam(learning_rate=0.01)


train_loss = Mean(name="train_loss")
valid_loss = Mean(name="test_loss")

train_acc = SparseCategoricalAccuracy(name="train_acc")
valid_acc = SparseCategoricalAccuracy(name="valid_acc")

## Train model function

In [34]:
def train_model(images, labels, teacher_model,student_model, optimizer, temp):
    teacher_log = teacher_model(images)
    with tf.GradientTape() as tape:
        student_log = student_model(images)
        loss = get_kd_loss(teacher_log, student_log, temp)
    
    gradients = tape.gradient(loss, student_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, student_model.trainable_variables))

    train_loss(loss)
    train_acc(labels, nn.softmax(student_log))

## Validation funcation

In [35]:
def validate_model(images, labels, teacher_model,student_model, temp):
    teacher_log = teacher_model(images)
    student_log = student_model(images)
    loss = get_kd_loss(teacher_log, student_log, temp)
    valid_loss(loss)
    valid_acc(labels, nn.softmax(student_log))

## Train whole model

In [38]:
def train_model_all(epochs, teacher_model, student_model, optimizer, temp=0.5):
    for epoch in range(epochs):
        for (images, labels) in train_dataset:
            train_model(images, labels, teacher_model, student_model, optimizer, temp)

        for (images, labels) in test_dataset:
            validate_model(images, labels, teacher_model, student_model, temp)
            
        (loss, acc) = train_loss.result(), train_acc.result()
        (val_loss, val_acc) = valid_loss.result(), valid_acc.result()
        
        train_loss.reset_states(), train_acc.reset_states()
        valid_loss.reset_states(), valid_acc.reset_states()
        
        template = "Epoch {}, loss: {:.3f}, acc: {:.3f}, val_loss: {:.3f}, val_acc: {:.3f}"
        print (template.format(epoch+1,loss,acc,val_loss,val_acc))        
    
    return teacher_model, student_model

In [39]:
_, student_model = train_model_all(20, teacher_model, student_model, optimizer)

Epoch 1, loss: 0.108, acc: 0.831, val_loss: 0.095, val_acc: 0.851
Epoch 2, loss: 0.092, acc: 0.852, val_loss: 0.100, val_acc: 0.845
Epoch 3, loss: 0.090, acc: 0.855, val_loss: 0.091, val_acc: 0.848
Epoch 4, loss: 0.086, acc: 0.861, val_loss: 0.097, val_acc: 0.850
Epoch 5, loss: 0.086, acc: 0.861, val_loss: 0.111, val_acc: 0.841
Epoch 6, loss: 0.084, acc: 0.864, val_loss: 0.110, val_acc: 0.834
Epoch 7, loss: 0.083, acc: 0.866, val_loss: 0.112, val_acc: 0.829
Epoch 8, loss: 0.079, acc: 0.869, val_loss: 0.098, val_acc: 0.848
Epoch 9, loss: 0.081, acc: 0.869, val_loss: 0.110, val_acc: 0.838
Epoch 10, loss: 0.079, acc: 0.871, val_loss: 0.099, val_acc: 0.844
Epoch 11, loss: 0.079, acc: 0.873, val_loss: 0.110, val_acc: 0.831
Epoch 12, loss: 0.078, acc: 0.873, val_loss: 0.099, val_acc: 0.846
Epoch 13, loss: 0.078, acc: 0.872, val_loss: 0.108, val_acc: 0.842
Epoch 14, loss: 0.077, acc: 0.874, val_loss: 0.103, val_acc: 0.850
Epoch 15, loss: 0.077, acc: 0.872, val_loss: 0.109, val_acc: 0.835
Epoc

### save the student model to .h5 file

In [42]:
student_model.save_weights("student_model.h5")

In [43]:
!ls -lh *.h5

-rw-r--r-- 1 root root 163K Sep 27 04:43 student_model.h5
-rw-r--r-- 1 root root 335K Sep 27 04:11 teacher_model.h5


Let's check the total number of trainable params.

In [44]:
teacher_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 16)        416       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 8, 8, 32)          12832     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 32)          0         
_________________________________________________________________
dropout (Dropout)            (None, 4, 4, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               6

In [45]:
student_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 48)                37680     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                490       
Total params: 38,170
Trainable params: 38,170
Non-trainable params: 0
_________________________________________________________________


## save model to .tflite file

In [46]:
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(X_train).batch(1).take(100):
        yield [input_value]

def convert_to_tflite(model, tflite_file):
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_data_gen
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8
    converter.inference_output_type = tf.int8
    tflite_quant_model = converter.convert()

    open(tflite_file, 'wb').write(tflite_quant_model)

In [47]:
convert_to_tflite(teacher_model, "teacher.tflite")
convert_to_tflite(student_model, "student.tflite")

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: /tmp/tmp1do_2kgn/assets
INFO:tensorflow:Assets written to: /tmp/tmp_s1a93ec/assets


INFO:tensorflow:Assets written to: /tmp/tmp_s1a93ec/assets


In [48]:
!ls -lh *.tflite

-rw-r--r-- 1 root root 40K Sep 27 04:45 student.tflite
-rw-r--r-- 1 root root 85K Sep 27 04:45 teacher.tflite


In [52]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [55]:
!ls

gdrive	     student_model.h5  teacher_model.h5
sample_data  student.tflite    teacher.tflite


In [58]:
!cp *.* gdrive/My\ Drive/