In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models

tf.config.run_functions_eagerly(True)

def create_custom_object_detection_model(num_classes):
    model = models.Sequential()
    
    # Convolutional Layers
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(None, None, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(256, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Bounding box prediction
    model.add(layers.Conv2D(4 + num_classes, (1, 1), activation='softmax'))
    
    return model

# Example usage:
num_classes = 3  # pen, paper, pencil
model = create_custom_object_detection_model(num_classes)
model.summary()


2024-08-21 18:43:04.996244: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-08-21 18:43:05.458882: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/ros/noetic/lib:/usr/local/cuda-11.3/lib64
2024-08-21 18:43:05.458909: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-08-21 18:43:06.446149: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvrtc.so.11.

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, None, None, 32)    896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, None, None, 32)   0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, None, None, 64)    18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, None, None, 64)   0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, None, None, 128)   73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, None, None, 128)  0

2024-08-21 18:43:07.812031: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-08-21 18:43:07.812318: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/ros/noetic/lib:/usr/local/cuda-11.3/lib64
2024-08-21 18:43:07.812389: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/ros/noetic/lib:/usr/local/cuda-11.3/lib64
2024-08-21 18:43:07.812445: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'

In [2]:
def parse_tfrecord_fn(example):
    feature_description = {
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
        'image/object/class/label': tf.io.VarLenFeature(tf.int64),
    }
    
    example = tf.io.parse_single_example(example, feature_description)
    
    image = tf.image.decode_jpeg(example['image/encoded'], channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    
    xmin = tf.sparse.to_dense(example['image/object/bbox/xmin'])
    xmax = tf.sparse.to_dense(example['image/object/bbox/xmax'])
    ymin = tf.sparse.to_dense(example['image/object/bbox/ymin'])
    ymax = tf.sparse.to_dense(example['image/object/bbox/ymax'])
    labels = tf.sparse.to_dense(example['image/object/class/label'])
    
    bboxes = tf.stack([xmin, ymin, xmax, ymax], axis=-1)
    
    # Pad bounding boxes and labels if necessary (to handle varying number of objects)
    max_objects = 10  # you can adjust this based on your dataset
    paddings = [[0, max_objects - tf.shape(bboxes)[0]], [0, 0]]
    bboxes = tf.pad(bboxes, paddings)
    labels = tf.pad(labels, [[0, max_objects - tf.shape(labels)[0]]])
    
    return image, (bboxes, labels)

def load_dataset(tfrecord_paths, batch_size):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_paths)
    parsed_dataset = raw_dataset.map(parse_tfrecord_fn)
    dataset = parsed_dataset.shuffle(1000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

# Example usage:
train_dataset = load_dataset('/home/tugalinebacker/hms_surprise/data/raw/first_shot/train.record', batch_size=1)
test_dataset = load_dataset('/home/tugalinebacker/hms_surprise/data/raw/first_shot/test.record', batch_size=1)




In [3]:
def custom_loss(y_true, y_pred):
    true_boxes, true_classes = y_true
    pred_boxes = y_pred[..., :4]
    pred_classes = y_pred[..., 4:]
    
    # Compute IoU or other appropriate loss for bounding boxes
    bbox_loss = tf.reduce_mean(tf.keras.losses.MSE(true_boxes, pred_boxes))
    
    # Compute classification loss
    class_loss = tf.reduce_mean(tf.keras.losses.SparseCategoricalCrossentropy()(true_classes, pred_classes))
    
    total_loss = bbox_loss + class_loss
    return total_loss

model.compile(optimizer='adam', loss=custom_loss, metrics=['accuracy'])


In [4]:
import tensorflow as tf
print(tf.__version__)

2.11.0


In [None]:
history = model.fit(train_dataset, validation_data=test_dataset, epochs=10)


In [None]:
evaluation = model.evaluate(test_dataset)
print(f"Test Loss: {evaluation[0]}, Test Accuracy: {evaluation[1]}")