In [1]:
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Dropout, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.regularizers import l2
from mnist_local_generator import mnist_localization_generator
import matplotlib.pyplot as plt 
import numpy as np 
from utils import xywh2xyxy, draw_rectangle
import cv2
import tensorflow.keras.backend as K
import tensorflow as tf 
from tensorflow.keras.losses import CategoricalCrossentropy, MSE

In [2]:
# load mnist dataset
# Generate mnist data for localization
(train_images, train_cls_true, train_reg_true), (test_images, test_cls_true, test_reg_true) = \
    mnist_localization_generator((231, 231), (231, 231),
                                 background=True, 
                                 image_size_range=(60, 80),
                                 image_ratio_range=(0.5, 1.5),
                                 n_sample=1000)

print(train_images.shape, test_images.shape)

배경 class가 포함되어 class 는 11로 설정 되어 있습니다.


100%|██████████| 1000/1000 [00:00<00:00, 39997.94it/s]
  4%|▍         | 40/1000 [00:00<00:02, 395.93it/s]

train image 을 random 하게 resize 합니다.


100%|██████████| 1000/1000 [00:02<00:00, 459.96it/s]
100%|██████████| 1000/1000 [00:00<00:00, 49992.30it/s]
  5%|▌         | 50/1000 [00:00<00:01, 495.01it/s]

train image 을 random 하게 resize 합니다.


100%|██████████| 1000/1000 [00:02<00:00, 462.31it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

object 가 없는 background 이미지를 추가합니다.(전체 이미지의 10%)
background 의 class 는 11로 설정 되어 있습니다.
background 의 regression 는 (0,0,0,0)로 설정 되어 있습니다.


100%|██████████| 100/100 [00:00<00:00, 416.70it/s]
100%|██████████| 100/100 [00:00<00:00, 460.83it/s]


(1100, 231, 231, 1) (1100, 231, 231, 1)


In [3]:
train_reg_cls = np.concatenate([train_reg_true, train_cls_true], axis=-1)
train_cls_true.shape, train_reg_true.shape, train_reg_cls.shape

((1100, 1, 1, 11), (1100, 1, 1, 4), (1100, 1, 1, 15))

In [4]:
K.clear_session()
# Fully convolution NN
input_ = Input(shape=(231, 231, 1))
print(input_)

# Conv stage 1
conv_1 = Conv2D(filters=96/8, kernel_size=(11, 11), strides=(4, 4), padding='valid', activation='relu',
                kernel_initializer=RandomNormal(0, 0.01), kernel_regularizer=l2(0.00001))(input_)
maxp_1 = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(conv_1)

# Conv stage 2
conv_2 = Conv2D(filters=256/8, kernel_size=(5, 5), strides=(1, 1), padding='valid', activation='relu',
                kernel_initializer=RandomNormal(0, 0.01), kernel_regularizer=l2(0.00001))(maxp_1)
maxp_2 = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(conv_2)

# Conv stage 3
conv_3 = Conv2D(filters=512/8, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu',
                kernel_initializer=RandomNormal(0, 0.01), kernel_regularizer=l2(0.00001))(maxp_2)

# Conv stage 4
conv_4 = Conv2D(filters=1024/8, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu',
                kernel_initializer=RandomNormal(0, 0.01), kernel_regularizer=l2(0.00001))(conv_3)

# Conv stage 5
conv_5 = Conv2D(filters=1024/8, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu',
                kernel_initializer=RandomNormal(0, 0.01), kernel_regularizer=l2(0.00001))(conv_4)
maxp_5 = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(conv_5)


KerasTensor(type_spec=TensorSpec(shape=(None, 231, 231, 1), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'")


In [5]:
# loc header 1
conv_6 = Conv2D(filters=4096/8, kernel_size=(6, 6), strides=(1, 1), padding='valid', activation='relu',
                kernel_initializer=RandomNormal(0, 0.01), kernel_regularizer=l2(0.00001))(maxp_5)

# loc header 2
conv_7 = Conv2D(filters=1024/8, kernel_size=(1, 1), strides=(1, 1), padding='valid', activation='relu',
                kernel_initializer=RandomNormal(0, 0.01), kernel_regularizer=l2(0.00001))(conv_6)

# loc header 3
loc_output = Conv2D(filters=4, kernel_size=(1, 1), strides=(1, 1), padding='valid')(conv_7)


In [6]:

# cls header 1
conv_6 = Conv2D(filters=4096/8, kernel_size=(6, 6), strides=(1, 1), padding='valid', activation='relu',
                kernel_initializer=RandomNormal(0, 0.01), kernel_regularizer=l2(0.00001))(maxp_5)

# cls header 2
conv_7 = Conv2D(filters=1024/8, kernel_size=(1, 1), strides=(1, 1), padding='valid', activation='relu',
                kernel_initializer=RandomNormal(0, 0.01), kernel_regularizer=l2(0.00001))(conv_6)

# cls header 3
cls_output = Conv2D(filters=11, kernel_size=(1, 1), strides=(1, 1), padding='valid', activation='softmax')(conv_7)


In [7]:
output = Concatenate(axis=-1)([loc_output, cls_output])

In [8]:
def overfeat_mse(true, pred):
    """
    :param true: ndarray, 4d tensor (NHWC) 단 C=4
    :param pred: ndarray or tensor, 4d tensor (NHWC), 단 C=4
    :return: mse_, float,
    """

    # slicing classification and regression
    mse = MSE(true, pred)
    mse_ = tf.math.reduce_mean(mse)

    return mse_

In [9]:
def overfeat_cee(true, pred):
    """
    :param true: ndarray, 4d tensor (NHWC) 단 C=11
    :param pred: ndarray or tensor, 4d tensor (NHWC), 단 C=11
    :return: cee_, float,
    """

    cee = CategoricalCrossentropy()
    cee_ = cee(true, pred)

    return cee_ 

In [10]:
def overfeat_loss(true, pred):
    """
    Description:
    :param true: ndarray, shape = (N, 1, 1, 15=(4+11))
    :param pred: ndarray, shape = (N, 1, 1, 15=(4+11))
    """

    true_reg = true[:, :, :, :4]
    true_cls = true[:, :, :, 4:]

    pred_reg = pred[:, :, :, :4]
    pred_cls = pred[:, :, :, 4:]

    # positive 인 data의 loss 만 localization loss 에 추가 
    pos_mask = true_cls[:, :, :, -1] != 1   
    pos_true_reg = true_reg[pos_mask]
    pos_pred_reg = pred_reg[pos_mask]

    mse_loss = overfeat_mse(pos_true_reg, pos_pred_reg)
    cee_loss = overfeat_cee(true_cls, pred_cls)

    total_loss = mse_loss*0.01 + cee_loss*2
    return total_loss


In [11]:
def metric_mse(true, pred):
    """
    Description:
    :param true: ndarray, shape = (N, 1, 1, 15=(4+11))
    :param pred: ndarray, shape = (N, 1, 1, 15=(4+11))
    """

    true_reg = true[:, :, :, :4]
    true_cls = true[:, :, :, 4:]
    pred_cls = pred[:, :, :, 4:]
    pred_reg = pred[:, :, :, :4]

    # positive 인 data의 loss 만 localization loss 에 추가 
    pos_mask = true_cls[:, :, :, -1] != 1   
    pos_true_reg = true_reg[pos_mask]
    pos_pred_reg = pred_reg[pos_mask]

    mse_loss = overfeat_mse(pos_true_reg, pos_pred_reg)
    return mse_loss


In [12]:
def metric_cee(true, pred):
    """
    Description:
    :param true: ndarray, shape = (N, 1, 1, 15=(4+11))
    :param pred: ndarray, shape = (N, 1, 1, 15=(4+11))
    """
    true_cls = true[:, :, :, 4:]
    pred_cls = pred[:, :, :, 4:]
    cee_loss = overfeat_cee(true_cls, pred_cls)

    return cee_loss


In [13]:
# generate keras model
model = Model(input_, output)
model.compile(optimizer='adam', loss=overfeat_loss, metrics=[metric_mse, metric_cee])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 231, 231, 1) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 56, 56, 12)   1464        input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 28, 28, 12)   0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 24, 24, 32)   9632        max_pooling2d[0][0]              
______________________________________________________________________________________________

In [None]:
model.fit(train_images, train_reg_cls, batch_size=32, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100

In [None]:
pred_loc_cls = model.predict(test_images[:10])

In [None]:
pred_loc = pred_loc_cls[:, ..., :4]
pred_cls = pred_loc_cls[:, ..., 4:]

In [None]:
pred_loc = np.squeeze(pred_loc)
pred_cls = np.argmax(pred_cls, axis=-1)
pred_cls = np.squeeze(pred_cls)

In [None]:
pred_loc = xywh2xyxy(pred_loc)

In [None]:
index = 5
rected_image = draw_rectangle(test_images[index, ..., 0], pred_loc[index])
print(pred_cls)
plt.imshow(rected_image)
plt.title(pred_cls[index])

In [None]:
model.save('./best_model.h5')