Skip to content

Commit

Permalink
OpenImages returns correct dataset composition.
Browse files Browse the repository at this point in the history
  • Loading branch information
dekked authored and nagitsu committed Aug 24, 2018
1 parent 2934c0e commit 356ee67
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 15 deletions.
30 changes: 16 additions & 14 deletions luminoth/tools/dataset/readers/object_detection/openimages.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(self, data_dir, split, download_threads=25, **kwargs):
self.yielded_records = 0
self.errors = 0

self._total_queued = 0
# Flag to notify threads if the execution is halted.
self._alive = True

Expand Down Expand Up @@ -141,11 +142,14 @@ def image_ids(self):
self._image_ids = image_ids
return self._image_ids

def _queue_partial_record(self, records_queue, partial_record):
if not partial_record['gt_boxes']:
def _queue_record(self, records_queue, record):
if (self._limit_examples is not None and
self._total_queued >= self._limit_examples):
return

if not record['gt_boxes']:
tf.logging.debug(
'Dropping record {} without gt_boxes.'.format(
partial_record))
'Dropping record {} without gt_boxes.'.format(record))
return

# If asking for a limited number per class, only yield if the current
Expand All @@ -155,23 +159,23 @@ def _queue_partial_record(self, records_queue, partial_record):
# while an image containing both "Person" and "Bus" instances will.
if self._max_per_class:
labels_in_image = set([
self.classes[bbox['label']]
for bbox in partial_record['gt_boxes']
self.classes[bbox['label']] for bbox in record['gt_boxes']
])
not_maxed_out = labels_in_image - self._maxed_out_classes

if not not_maxed_out:
tf.logging.debug(
'Dropping record {} with maxed-out labels: {}'.format(
partial_record['filename'], labels_in_image))
record['filename'], labels_in_image))
return

tf.logging.debug(
'Queuing record {} with labels: {}'.format(
partial_record['filename'], labels_in_image))
record['filename'], labels_in_image))

self._will_add_record(partial_record)
records_queue.put(partial_record)
self._will_add_record(record)
self._total_queued += 1
records_queue.put(record)

def _queue_records(self, records_queue):
"""
Expand Down Expand Up @@ -214,9 +218,7 @@ def _queue_records(self, records_queue):
if line['ImageID'] != current_image_id:
# Yield if image changes and we have current image.
if current_image_id is not None:
self._queue_partial_record(
records_queue, partial_record
)
self._queue_record(records_queue, partial_record)

# Start new record.
current_image_id = line['ImageID']
Expand All @@ -235,7 +237,7 @@ def _queue_records(self, records_queue):
})

else:
self._queue_partial_record(records_queue, partial_record)
self._queue_record(records_queue, partial_record)

# Wait for all task to be consumed.
records_queue.join()
Expand Down
2 changes: 1 addition & 1 deletion luminoth/tools/dataset/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def transform(dataset_reader, data_dir, output_dir, splits, only_classes,
writer = ObjectDetectionWriter(split_reader, output_dir, split)
writer.save()

tf.logging.info('Dataset composition per class:')
tf.logging.info('Composition per class ({}):'.format(split))
for label, count in split_reader._per_class_counter.most_common():
tf.logging.info(
'\t%s: %d', split_reader.pretty_name(label), count
Expand Down

0 comments on commit 356ee67

Please sign in to comment.