### Single Class Bounding Box Regression
Ref - https://www.pyimagesearch.com/2020/10/05/object-detection-bounding-box-regression-with-keras-tensorflow-and-deep-learning/

In [1]:
import cv2
import numpy as np
import os

from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array, load_img

### Parameter

In [2]:
SEED = 1

In [3]:
# Data (Train)
DATA_PATH = 'data'
IMG_DATA_PATH = os.path.join(DATA_PATH, 'images')
ANN_DATA_PATH = os.path.join(DATA_PATH, 'airplanes.csv')

In [43]:
# Data (Test)
IMG_PATH = 'image_0262.jpg'

In [5]:
# Train Model
LR = 1e-4
EPOCHS = 20
BATCH_SIZE = 32
INPUT_SHAPE = (224, 224, 3)
VAL_PERC = 0.1

In [6]:
# Output
OUT_PATH = 'out'
OUT_MODEL_PATH = os.path.join(OUT_PATH, 'detector.h5')

### Load Data

In [20]:
# Load Annotation Data
# List of (Filename, x0, y0, x1, y1)
ann_rows = open(ANN_DATA_PATH).read().strip().split('\n')

In [21]:
# Load Image Data
data, targets = [], []
for row in ann_rows:
    # Extract filename and bounding box
    (filename, x0, y0, x1, y1) = row.split(',')

    # Read Image
    img = cv2.imread(os.path.join(IMG_DATA_PATH, filename))

    # Normalize bounding box
    h, w = img.shape[:2]
    x0_n, x1_n = float(x0) / w, float(x1) / w
    y0_n, y1_n = float(y0) / h, float(y1) / h

    # Preprocessing
    img_p = cv2.resize(img, INPUT_SHAPE[:2])

    # Append data
    data.append(img_p)
    targets.append((x0_n, y0_n, x1_n, y1_n))

In [44]:
# Load Test Data
img_test = cv2.imread(IMG_PATH)

# Preprocess Test Data
img_test_p = cv2.resize(img_test, INPUT_SHAPE[:2])
img_test_p = img_test_p.astype(np.float32) / 255.0
img_test_p = img_test_p[np.newaxis, ...]

### Preprocess Data

In [23]:
# Normalize
data = np.array(data, dtype=np.float32) / 255.0
targets = np.array(targets, dtype=np.float32)

### Split Data

In [25]:
split = train_test_split(data, targets, test_size=VAL_PERC, random_state=SEED)

data_train, data_val = split[:2]
targets_train, targets_val = split[2:4]

### Create Model

In [26]:
# VGG16 baseline (Fix Weight For Transfer Learning)
vgg = VGG16(include_top=False, input_tensor=Input(shape=INPUT_SHAPE))
vgg.trainable = False

In [27]:
# Add FC layer for Box Regression
flatten = Flatten()(vgg.output)
fc1 = Dense(128, activation='relu')(flatten)
fc2 = Dense(64, activation='relu')(fc1)
fc3 = Dense(32, activation='relu')(fc2)
final = Dense(4, activation='sigmoid')(fc3)
model = Model(inputs=vgg.input, outputs=final)

In [28]:
# Compile
model.compile(loss='mse', optimizer=Adam(learning_rate=LR))

### Train Model

In [None]:
model.fit(data_train, targets_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(data_val, targets_val))

### Save Model

In [30]:
if not os.path.isdir(OUT_PATH):
    os.makedirs(OUT_PATH)

In [31]:
model.save(OUT_MODEL_PATH)

### Performance Evaluation

In [45]:
# Inference
preds = model.predict(img_test_p)

In [46]:
# Post Processing
test_h, test_w = img_test.shape[:2]

x0 = int(preds[0, 0] * test_w)
y0 = int(preds[0, 1] * test_h)
x1 = int(preds[0, 2] * test_w)
y1 = int(preds[0, 3] * test_h)

In [47]:
# Overlay Rectangle
img_test = cv2.rectangle(img_test, (x0, y0), (x1, y1), (0, 255, 0), 2)

In [48]:
cv2.imshow('Detection Result', img_test)
cv2.waitKey(0)
cv2.destroyAllWindows()