In [1]:
import tensorflow as tf
import os
import json
import cv2
import numpy as np
import albumentations as A

In [3]:
IMG_SIZE = (300, 300)
BATCH_SIZE = 8
NUM_CLASSES = 10

In [5]:
AUGMENTATION = A.Compose([A.Resize(height=IMG_SIZE[0], width=IMG_SIZE[1])])

load the labels

In [7]:
def load_labels(label_file):
    """
    This function loads the labels and returns the labels

    Args:
    label_file = File path of the json file.

    Returns:
    It returns the loaded labels
    """
    with open(file=label_file, mode='r') as f:
        return json.load(fp=f)

preprocess image pipeline

In [13]:
def preprocessing_image(image_path):
    """
    This function read the image and converts to RGB format

    Args:
    image_path: Training image file path.

    Returns:
    It returns coloe corrected image.
    """
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

preprocess a sample pipeline

In [15]:
#old ignore this code
def process_sample(sample, image_dir):
    """
    This function returns processed image, bounding box coordinates and label associated with the image.

    Args:
    Sample: Sample image from the training dataset.
    image_dir: Training image path.

    Return:
    It processed image, bounding box coordinates and label associated with the image.
    """
    image_path = os.path.join(image_dir, sample['name'])
    image = preprocessing_image(image_path=image_path)
    
    #place holders for the bounding boxes dimensions and labels
    b_boxes = []
    labels = []

    #extarct bounding box dimensions and labels from sample
    for item in sample['labels']:
        x1, y1, x2, y2 = item['box2d']['x1'], item['box2d']['y1'], item['box2d']['x2'], item['box2d']['y2']
        b_boxes.append([x1, y1, x2, y2])
        labels.append(item['category'])

    #resize the image
    augmented = AUGMENTATION(image=image, bboxes=b_boxes)

    return augmented['image'], np.array(b_boxes), np.array(labels)

image genarator pipeline

In [40]:
def data_generator(image_dir, label_file, batch_size=BATCH_SIZE):
    """
    This function generate batches of images, bounding boxes & labels

    Args:
    image_dir: Image path
    label_file: class labels
    batch_size: Training image batch size

    Return:
    It yields the batches of images, bounding boxes & labels

    """
    labels_data = load_labels(label_file)
    total_images = len(labels_data)

    while True:
        for i in range(0, total_images, batch_size):
            batch_images = labels_data[i:i + batch_size]
            images, b_boxes, labels, masks = [], [], [], []

            for sample in batch_images:
                img, box, lbl, mask = process_sample(sample, image_dir)
                images.append(img)
                b_boxes.append(box)
                labels.append(lbl)
                masks.append(masks)
             #Convert to TensorFlow Ragged Tensors to handle varying sizes
            images = np.array(images)  # (batch, 300, 300, 3)
            boxes = tf.ragged.constant(b_boxes)  # Variable-length bounding boxes
            labels = tf.ragged.constant(labels)  # Variable-length labels
            masks = np.array(masks).reshape(-1, 300, 300, 1)  # Ensure masks have uniform shape

            yield images, boxes, labels, masks

model

In [42]:
# Loading Data and file paths
IMAGE_DIR = "D:/learning_desk/bosch_assignment_bdd_100k/data/bdd100k/images/train"
LABEL_FILE = 'D:/learning_desk/bosch_assignment_bdd_100k/data/bdd100k/labels/bdd100k_labels_images_train.json'
train_data = data_generator(IMAGE_DIR, LABEL_FILE, batch_size=BATCH_SIZE)

In [44]:
train_data

<generator object data_generator at 0x000001E7C65CEAE0>

In [19]:
def process_sample(sample, image_dir):
    """
    This function returns processed image, bounding box coordinates and label associated with the image.

    Args:
    Sample: Sample image from the training dataset.
    image_dir: Training image path.

    Return:
    It processed image, bounding box coordinates and label associated with the image.
    """
    image_path = os.path.join(image_dir, sample['name'])
    image = preprocessing_image(image_path=image_path)

    height, width = image.shape[:2]
    #place holders for the bounding boxes dimensions and labels
    b_boxes = []
    labels = []
    mask = np.zeros((height, width), dtype=np.uint8)

    #extarct bounding box dimensions and labels from sample
    for item in sample['labels']:
        category = item["category"]
        if "box2d" in item:
            x1, y1, x2, y2 = item['box2d']['x1'], item['box2d']['y1'], item['box2d']['x2'], item['box2d']['y2']
            b_boxes.append([x1, y1, x2, y2])
            labels.append(item['category'])

        if "poly2d" in item:
            for poly in item["poly2d"]:
                poly_vertices = np.array(poly["vertices"], dtype=np.int32).reshape((-1, 1, 2))
                cv2.fillPoly(mask, [poly_vertices], color=255)  # Fill the polygon area in mask

    #resize the image
    augmented = AUGMENTATION(image=image, bboxes=b_boxes)
    mask_resized = cv2.resize(mask, IMG_SIZE)

    return augmented['image'], np.array(b_boxes), np.array(labels), mask_resized

In [23]:
labels_data = load_labels(LABEL_FILE)

# Test one sample
sample = labels_data[0]
image, b_boxes, labels, mask = process_sample(sample, IMAGE_DIR)

print("Image shape:", image.shape)
print("Bounding Boxes:", b_boxes.shape)
print("Labels:", labels.shape)
print("Mask shape:", mask.shape)

Image shape: (300, 300, 3)
Bounding Boxes: (7, 4)
Labels: (7,)
Mask shape: (300, 300)


model build

In [26]:
def build_model():
    """This function builds an object detection model using MobileNetV2 as a feature extractor.
    """
    base_model = tf.keras.applications.MobileNetV2(input_shape=(300, 300, 3), include_top=False)
    base_model.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")
    ])

    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["precision"])
    return model

In [28]:
model = build_model()

  base_model = tf.keras.applications.MobileNetV2(input_shape=(300, 300, 3), include_top=False)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [34]:
model_dir = 'D:/learning_desk/bosch_assignment_bdd_100k/models/'
model_name = 'ssd_mobilenet_v2_bdd.h5'
model_path = os.path.join(model_dir, model_name)
model_path

'D:/learning_desk/bosch_assignment_bdd_100k/models/ssd_mobilenet_v2_bdd.h5'

In [None]:
model.fit(train_data, epochs=1, steps_per_epoch=100)
model.save(filepath=model_path)