# Load packages

In [2]:
# reload all modules
%reload_ext autoreload
%autoreload 2

import fiftyone as fo
# session = fo.launch_app(auto=False)

import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import utils
import json

fo.list_datasets()

['BDD100k', 'BelgiumTS', 'CCTSDB', 'GTSDB', 'TT100k', 'videos']

# Dataset Management

In [None]:
dataset = fo.load_dataset("GTSDB")

In [None]:
utils.resize_samples(dataset, 640, 640)

In [None]:
utils.delete_datasets()

In [None]:
utils.load_datasets(['GTSDB'])

# Mapillary

In [None]:
#Load mapillary dataset

name = "Mapillary"

if not fo.dataset_exists(name):

    dataset_dir = "../datasets/mapillary/images"

    dataset = fo.Dataset.from_dir(
        dataset_dir=dataset_dir,
        dataset_type=fo.types.ImageDirectory,
        name=name,
    )

    dataset.persistent = True
    dataset.compute_metadata()

    annotations_dir = "../datasets/mapillary/mtsd_v2_fully_annotated/annotations"

    dataset.tag_samples(name)

    with fo.ProgressBar() as pb:
        for sample in pb(dataset):
                json_file = sample.filename.replace('.jpg','.json').replace('.png','.json')
                json_file = os.path.join(annotations_dir, json_file)
                if not os.path.exists(json_file):
                    continue
                    #dataset.delete_samples([sample])
                else:
                    with open(json_file) as f:
                        data = json.load(f)
                    detections = []

                    for object in data['objects']:
                        # Convert to [top-left-x, top-left-y, width, height]
                        # in relative coordinates in [0, 1] x [0, 1]
                        W, H = sample.metadata.width, sample.metadata.height
                        x1, x2, y1, y2 = object['bbox']['xmin']/W, object['bbox']['xmax']/W, object['bbox']['ymin']/H, object['bbox']['ymax']/H
                        rel_box = [x1, y1, (x2 - x1), (y2 - y1)]
                        detections.append(
                                        fo.Detection(
                                            label=object['label'],
                                            bounding_box=rel_box,   
                                        )
                                    )
                    if len(detections) != 0:                   
                        sample['detections'] = fo.Detections(detections=detections)
                    sample.save()

# Master Dataset

In [None]:
if not fo.dataset_exists('master_dataset'):
    master_dataset = fo.Dataset('master_dataset')
    master_dataset.persistent = True

    for dataset_name in fo.list_datasets():
        master_dataset.merge_samples(fo.load_dataset(dataset_name))

In [None]:
# #joining all datasets
# if not fo.dataset_exists('master_dataset'):
#     master_dataset = fo.Dataset('master_dataset')

#     for dataset_name in fo.list_datasets():
#         dataset = fo.load_dataset(dataset_name)
#         if dataset_name == 'master_dataset' or dataset_name == 'my-videos':
#             continue
#         for sample in dataset:
#             if len(master_dataset.match(F('filepath')==sample.filepath)) == 0:
#                 master_dataset.add_sample(sample)

#     master_dataset.persistent = True
#     master_dataset.save()

# else:
#     master_dataset = fo.load_dataset('master_dataset')



# BRTS Pierre2024

In [3]:
from tqdm import tqdm
import xml.etree.ElementTree as ET
from fiftyone import ViewField as F
from PIL import Image
images_dir = '/home/madu/GIT/FOdatasets/BRTS/Brazilian Vertical Traffic Signs and Lights Dataset/images'
annotations_dir = images_dir.replace('images','annotations')
name = 'BRTS'

label_map = {
    '000000': 'pare',
    '000001': 'preferencia',
    '000003': 'proibido_virar_esquerda',
    '000004': 'proibido_virar_direira',
    '000007': 'proibido_estacionar',
    '000008': 'permitido_estacionar',
    '000009': 'proibido_parar_estacionar',
    '000023': 'velocidade_80',
    '000025': 'lombada',
    '000028': 'sentido_obrigatorio',
    '000035': 'trafego_direita',
    '000040': 'exclusivo_onibus',
    '000042': 'exclusivo_ciclistas',
    '000051': 'amarelo',
    '000052': 'vermelho',
    '000053': 'verde',

}

# Function to parse XML and extract data
def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    W = int(root.find('size/width').text)
    H = int(root.find('size/height').text)

    detections = []
    for obj in root.findall('object'):
        label = label_map[obj.find('name').text]
        bbox = obj.find('bndbox')
        xmin = int(float(bbox.find('xmin').text)) / W
        ymin = int(float(bbox.find('ymin').text)) / H
        xmax = int(float(bbox.find('xmax').text)) / W
        ymax = int(float(bbox.find('ymax').text)) / H
        x0 = min(xmin, xmax)
        y0 = min(ymin, ymax)
        w = abs(xmax - xmin)
        h = abs(ymax - ymin)

        rel_box = [x0, y0, w, h]
        detections.append(
            fo.Detection(
                label=label,
                bounding_box=rel_box,
            )
        )

    return detections

for img in os.listdir(images_dir):
    if '@' in img:
        os.remove(os.path.join(images_dir,img))

for annot in os.listdir(annotations_dir):
    if annot.endswith('.xml') and '@' in annot:
        os.remove(os.path.join(annotations_dir,annot))

if not fo.dataset_exists(name):
    dataset = fo.Dataset.from_dir(
            dataset_dir=images_dir,
            dataset_type=fo.types.ImageDirectory,
            name=name,
        )
    
    for xml_file in tqdm(os.listdir(annotations_dir)):
        if xml_file.endswith('.xml'):
            xml_file_path = os.path.join(annotations_dir, xml_file)
            filename = xml_file.replace('xml','jpg')
            detections = parse_xml(xml_file_path)
            view = dataset.match(F("filepath").contains_str(filename))
            if len(view) > 0:
                sample = view.first()
                if len(detections) != 0:  
                    sample['detections'] = fo.Detections(detections=detections)
                sample.save()
    dataset.persistent = True
    #dataset.compute_metadata()


 100% |███████████████| 1364/1364 [128.0ms elapsed, 0s remaining, 10.8K samples/s] 


100%|██████████| 1363/1363 [00:08<00:00, 161.07it/s]


# Videos

In [None]:
dataset = fo.Dataset.from_dir(
        dataset_dir='/home/madu/GIT/FOdatasets/videos',
        dataset_type=fo.types.VideoDirectory,
        name='videos',
    )
dataset.persistent = True

# Load dataset

In [None]:
dataset = fo.load_dataset('GTSDB')

In [4]:
dataset.untag_samples(['train','val'])
dataset.tag_samples('train')
val = dataset.take(int(len(dataset)*0.2))
val.tag_samples('val')
val.untag_samples('train')

# Export dataset
## Yolo

In [None]:
import yaml
# The splits to export
splits = ["train", "val"]

one_class = False

# All splits must use the same classes list

export_dir = f'../datasets/{dataset.name}'

if one_class:
    export_dir += '-1class'
    map_dict = {k: 'sign' for v, k in enumerate(dataset.distinct('detections.detections.label'))}
    view = dataset.map_labels(
        'detections',
        map=map_dict,
    )
else:
    view = dataset

classes = view.distinct('detections.detections.label')
classes.sort()

# Export the splits
for split in splits:
    split_view = view.match_tags(split)
    split_view.export(
        export_dir=export_dir,
        dataset_type=fo.types.YOLOv5Dataset,
        label_field='detections',
        split=split,
        classes=classes,
    )

with open(f'{export_dir}/dataset.yaml', 'r') as f:
    dataset_yaml = yaml.safe_load(f)

dataset_yaml['path'] = f'./datasets/{dataset.name}'
with open(f'{export_dir}/dataset.yaml', 'w') as f:
    yaml.dump(dataset_yaml, f)

## TFRecord

In [None]:
import fiftyone as fo

export_dir = f'../datasets/{dataset.name}_TFRecord'
label_field = "detections"  # for example

# Export the dataset
dataset.take(200).export(
    export_dir=export_dir,
    dataset_type=fo.types.TFObjectDetectionDataset,
    label_field=label_field,
)