In [1]:
!pip install einops

Collecting einops
  Downloading https://files.pythonhosted.org/packages/5d/a0/9935e030634bf60ecd572c775f64ace82ceddf2f504a5fd3902438f07090/einops-0.3.0-py2.py3-none-any.whl
Installing collected packages: einops
Successfully installed einops-0.3.0


In [2]:
#구현하는 모델에서 쓰이는 모든 activation함수는 정의하여 드린 GELU 함수를 사용해야함.
#MultiHeadAttention에서 Head로 나눌때, 이미지를 patch로자른후 sequence로 만들때 Rearrange함수를 사용하면 편리함.(사용하지 않으셔도 됩니다)
#CIFAR10에 대한 test accuracy가 60프로 이상인 ViT모델을 만드시오.
import tensorflow as tf
from einops.layers.tensorflow import Rearrange
from tensorflow.keras.activations import gelu
GELU = lambda x : gelu(x)

In [3]:
#논문[1]에서 설명하는 MultiHeadAttention을 만들어라.
class MultiHeadedAttention(tf.keras.Model):
    #dimension - 모델의 dimension(MHA를 거친 후의 dimension)
    def __init__(self, dimension, heads=8):
        super(MultiHeadedAttention, self).__init__()
        ############Write your code Here############
        self.dimension = dimension
        self.heads = heads
        
        assert dimension % heads == 0

        self.depth = dimension // heads
        
        self.W0_layer = tf.keras.layers.Dense(dimension)
        self.query_layer = tf.keras.layers.Dense(dimension)
        self.key_layer = tf.keras.layers.Dense(dimension)
        self.value_layer = tf.keras.layers.Dense(dimension)
        self.combine_layer = tf.keras.layers.Dense(dimension)
        ############################################
    def call(self, inputs):
        output = None
        batch_size = tf.shape(inputs)[0]
        ############Write your code Here############
        query = self.query_layer(inputs)
        key = self.key_layer(inputs)
        value = self.value_layer(inputs)
        
        query = tf.reshape(query,shape = (batch_size,-1,self.heads,self.depth))
        key = tf.reshape(key,shape = (batch_size,-1,self.heads,self.depth))
        value = tf.reshape(value,shape = (batch_size,-1,self.heads,self.depth))
        tf.transpose(query, perm=[0, 2, 1, 3])
        tf.transpose(key, perm=[0, 2, 1, 3])
        tf.transpose(value, perm=[0, 2, 1, 3])

        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = GELU(scaled_score)
        attention = tf.matmul(weights, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])

        concat_attention = tf.reshape(attention, (batch_size, -1, self.dimension))
        output = self.combine_layer(concat_attention)
        ############################################
        return output

#인자로 받은 residual_function을 사용하여 real_function값을 return하여주는 Class를 만들어라.(call함수 참고)
class ResidualBlock(tf.keras.Model):
    def __init__(self, residual_function):
        super(ResidualBlock, self).__init__()
        ############Write your code Here############
        self.residual_function = residual_function
        ############################################

    def call(self, inputs):
        return self.residual_function(inputs) + inputs

#인자로 받은 normfunction에 들어가기전에 LayerNormalization을 해주는 Class를 만들어라.(call함수 참고)
class NormalizationBlock(tf.keras.Model):
    def __init__(self, norm_function, epsilon=1e-5):
        super(NormalizationBlock, self).__init__()
        ############Write your code Here############
        self.norm_function = norm_function
        self.normalize = tf.keras.layers.LayerNormalization(epsilon =  epsilon)
        ############################################

    def call(self, inputs):
        return self.norm_function(self.normalize(inputs))

#논문[1]에서의 MLPBlock을 만들어라.
class MLPBlock(tf.keras.Model):
    #output_dimension - MLPBlock의 output dimension
    #hidden_dimension - MLPBlock의 hidden layer dimension
    def __init__(self, output_dimension, hidden_dimension):
        super(MLPBlock, self).__init__()
        ############Write your code Here############
        self.layer1 = tf.keras.layers.Dense(hidden_dimension)
        self.GELU = GELU
        self.layer2 = tf.keras.layers.Dense(output_dimension)
        ############################################

    def call(self, inputs):
        output = None
        ############Write your code Here############
        output = self.layer1(inputs)
        output = self.GELU(output)
        output = self.layer2(output)
        ############################################
        return output

#논문[1]을 읽고 TransformerEncoder를 위에서 정의한 class들을 사용하여 만들어라.
class TransformerEncoder(tf.keras.Model):
    #dimension - 모델의 dimension(MHA를 거친 후의 dimension), heads - MHA에서 head의 개수
    #depth - encoder layer의 개수, mlp_dimension - MLP block의 hidden layer의 dimension
    def __init__(self, dimension, depth, heads, mlp_dimension): 
        super(TransformerEncoder, self).__init__()
        layers_ = []
        for _ in range(depth):
            ############Write your code Here############
            layers_.append(ResidualBlock(NormalizationBlock(MultiHeadedAttention(dimension, heads))))
            layers_.append(ResidualBlock(NormalizationBlock(MLPBlock(dimension,mlp_dimension))))
            ############################################
        self.layers_ = tf.keras.Sequential(layers_)

    def call(self, inputs):
        return self.layers_(inputs)

#논문[2]를 읽고 ViT모델을 위에서 정의한 class들을 사용하여 만들어라.
class ImageTransformer(tf.keras.Model):
    #image_size - 이미지의 W==H의 크기(int), patch_size - 이미지를 쪼갤 patch의 크기(int)
    #n_classes - 최종 class의 개수, batch_size - 배치사이즈
    #dimension - 모델의 dimension(MHA를 거친 후의 dimension), depth - encoder layer의 개수
    #heads - MHA에서 head의 개수, mlp_dimension - MLP block의 hidden layer의 dimension
    #channel - input image에 대한 channel의 수
    def __init__(
            self, image_size, patch_size, n_classes, batch_size,
            dimension, depth, heads, mlp_dimension, channels=3):
        super(ImageTransformer, self).__init__()
        assert image_size % patch_size == 0, 'invalid patch size for image size'

        num_patches = (image_size // patch_size) ** 2
        self.patch_size = patch_size
        self.dimension = dimension
        self.batch_size = batch_size

        self.positional_embedding = self.add_weight(
            "position_embeddings", shape=[1, num_patches + 1, dimension],
            initializer=tf.keras.initializers.RandomNormal(), dtype=tf.float32
        )
        self.classification_token = self.add_weight(
            "classification_token", shape=[1, 1, dimension],
            initializer=tf.keras.initializers.RandomNormal(), dtype=tf.float32
        )
        ############Write your code Here############
        self.image_size = image_size
        self.channels = channels
        self.patch_dim = channels * patch_size ** 2
        self.patch_projection = tf.keras.layers.Dense(dimension)
        self.encoder = TransformerEncoder(dimension, depth, heads, mlp_dimension)
        self.mlp_head = tf.keras.layers.Dense(n_classes)
        ############################################

    def call(self, inputs):
        output = None
        ############Write your code Here############
        batch_size = tf.shape(inputs)[0]
        inputs = tf.reshape(inputs,[batch_size,self.image_size,self.image_size,self.channels])
        patches = tf.image.extract_patches(
            images=inputs,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patches = tf.reshape(patches, [batch_size, -1, self.patch_dim])
        X = self.patch_projection(patches)
        class_token = tf.broadcast_to(self.classification_token,[batch_size,1,self.dimension])
        X = tf.concat([class_token,X],axis = 1)
        X = X + self.positional_embedding
        X = self.encoder(X)
        output = self.mlp_head(X[:,0])
        ############################################
        return output

In [5]:
from tensorflow.keras import datasets
# Download and prepare the CIFAR10 dataset
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
# Normalize pixel values to be between 0 and 1
############Write your code Here############
train_images = train_images / 255.0
test_images = test_images / 255.0
############################################
# Make image shape (BS, H, W, C) to (BS, C, H, W)
############Write your code Here############
train_images = train_images.reshape(-1,3,32,32)
test_images = test_images.reshape(-1,3,32,32)
############################################

#Initialize your model
#Initialize optimizer and loss and compile it to the model
############Write your code Here############

model = ImageTransformer(image_size=32, patch_size=4, n_classes=10, batch_size=32, dimension=64, depth=6, heads=4, mlp_dimension=128, channels=3)
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              optimizer="adam",
              metrics=["accuracy"])
early_stop = tf.keras.callbacks.EarlyStopping(patience=5)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)

############################################

#Train your model
############Write your code Here############
model.fit(train_images, train_labels, batch_size=32, epochs=100, validation_data = (test_images, test_labels), callbacks=[early_stop,reduce_lr])
############################################
print('==============Training Finished===============')

#Evaluate your test samples
accuracy = 0
############Write your code Here############
_, accuracy = model.evaluate(test_images, test_labels, batch_size=32)
############################################

print('Test Accuracy :', accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Test Accuracy : 0.6291999816894531
