# Saturate VRAM using increasingly bigger datasets

This implementation uses https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3 repository

In [1]:
import glob
import tensorflow as tf
from yolov3.yolov4 import Create_Yolo
from yolov3.utils import load_yolo_weights, detect_image, detect_realtime, detect_video, detect_video_realtime_mp

2023-07-13 01:01:37.010705: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Define image size
IMAGE_SIZE = 416

# Create a dataset of image paths
image_directory = "/media/uthy/M_2/CentroGEO/Image-Captioning-Tutorial/train2014"
image_paths = glob.glob(f"{image_directory}/*.jpg")
path_ds = tf.data.Dataset.from_tensor_slices(image_paths)

2023-07-13 01:01:38.399721: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-13 01:01:38.425242: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-13 01:01:38.425558: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [3]:
def load_and_preprocess_image(path):
    path = path.numpy().decode("utf-8")  # convert tensor to numpy array and decode bytes to string
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [IMAGE_SIZE, IMAGE_SIZE])  # YOLOv3 requires input images to be 416x416x3
    img = img / 255  # normalize to [0,1] range
    return img

def load_and_wrap_preprocess_image(path):
    return tf.py_function(load_and_preprocess_image, [path], tf.float32)

# Create dataset
image_ds = path_ds.map(load_and_wrap_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

In [4]:
# Create the YOLO model
IMAGE_SIZE = 416
model_size = (IMAGE_SIZE, IMAGE_SIZE, 3)
num_classes = 80  # adjust this to the number of classes in your dataset if it's not COCO
model = Create_Yolo(input_size=IMAGE_SIZE, channels=3, CLASSES="model_data/coco/coco.names")
load_yolo_weights(model, "yolov3.weights")  # specify the path to your YOLO weights file

In [5]:
# Test the model with increasingly larger batches
for power in range(0, 10):  # adjust this range based on what you think is reasonable for your GPU
    batch_size = 2 ** power
    try:
        # Take a batch from the dataset
        batch = next(iter(image_ds.batch(batch_size)))
        # Feed the batch to the model
        boxes, scores, classes = model.predict(batch)
        print(f"Batch size {batch_size} processed successfully")
    except tf.errors.ResourceExhaustedError:
        print(f"Batch size {batch_size} exceeded GPU memory limit")
        break

2023-07-13 01:01:40.655827: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [82783]
	 [[{{node Placeholder/_0}}]]
2023-07-13 01:01:42.276809: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8900
2023-07-13 01:01:42.790068: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Batch size 1 processed successfully


2023-07-13 01:01:43.192956: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [82783]
	 [[{{node Placeholder/_0}}]]


Batch size 2 processed successfully


2023-07-13 01:01:43.909247: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [82783]
	 [[{{node Placeholder/_0}}]]


Batch size 4 processed successfully


2023-07-13 01:01:44.853245: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [82783]
	 [[{{node Placeholder/_0}}]]


Batch size 8 processed successfully


2023-07-13 01:01:46.195052: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [82783]
	 [[{{node Placeholder/_0}}]]


Batch size 16 processed successfully


2023-07-13 01:01:48.566764: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [82783]
	 [[{{node Placeholder/_0}}]]
2023-07-13 01:01:49.749916: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.01GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-07-13 01:01:49.777599: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.01GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-07-13 01:01:49.777628: W tensorflow/tsl/fram

Batch size 32 processed successfully


2023-07-13 01:01:52.582486: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [82783]
	 [[{{node Placeholder/_0}}]]


Batch size 64 processed successfully


2023-07-13 01:01:53.569358: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [82783]
	 [[{{node Placeholder/_0}}]]




2023-07-13 01:02:04.995820: W tensorflow/tsl/framework/bfc_allocator.cc:485] Allocator (GPU_0_bfc) ran out of memory trying to allocate 679.25MiB (rounded to 712249344)requested by op model/zero_padding2d/Pad
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-07-13 01:02:04.995929: I tensorflow/tsl/framework/bfc_allocator.cc:1039] BFCAllocator dump for GPU_0_bfc
2023-07-13 01:02:04.995952: I tensorflow/tsl/framework/bfc_allocator.cc:1046] Bin (256): 	Total Chunks: 50, Chunks in use: 48. 12.5KiB allocated for chunks. 12.0KiB in use in bin. 5.2KiB client-requested in use in bin.
2023-07-13 01:02:04.995967: I tensorflow/tsl/framework/bfc_allocator.cc:1046] Bin (512): 	Total Chunks: 62, Chunks in use: 60. 31.2KiB allocated for chunks. 30.2KiB in use in bin. 30.0KiB client-requested in use in bin.
2023-07-13 01:02:04.995981: I tensor