# Creating a Voxel51 Dataset from SkyScan Images
This notebook will create a Voxel51 dataset from images captured by SkyScan. The images will be labeled with the ICAO24 identifer from the ADS-B broadcast.

In [1]:
# Authors: Luke Berndt, John Speed Meyers
# SkyScan Project
# IQT Labs, IQT

In [15]:
import glob
import os
import random #not needed in production build

import fiftyone as fo

In [12]:
# Name of the Voxel51 dataset. It should not exist already.
DATASET_NAME="jsm-test-dataset"
# Directory with the images. It should be mapped into the container using the Docker volume command.
IMAGE_DIR="/images"
DATASOURCE_NAME="testPlaneDetect"

In [18]:
def build_image_list(file_path):
    """Create a list of plane data dicts.
    
    Extract plane data from from jpg filenames.

    The plane image filenames follow a strict naming convention.
    For instance, ac760d_194_50_11882_2021-05-13-14-13-42.jpg translates to
    ac760d - plane_id, aka ICAO 24
    194 - plane bearing
    50 - plane elevation
    11882 - plane distance
    TODO: external_id - ????
    
    Args:
        file_path - Path to images
        
    Returns:
        (list) image_list - a list of plane dict objects
    """
    image_list = []
    imageNum = 0
    for folder, _, files in os.walk(file_path):
        for file in files:
            if file.endswith(".jpg"):
                # extract metadata for each plane from filename
                image_filename = os.path.basename(file)
                ##### - added code
                # FOR TESTING PURPOSES
                # changes the path for all the images to be compatible with the notebooks
                # randomly assigns a plane_id_num to each 
                plane_id_num = random.randint(0,2)
                plane_id_str
                match plane_id_num:
                    case 0:
                        plane_id_num = "ac760d"
                    case 1:
                        plane_id_num = "abc123"
                    case 2:
                        plane_id_num = "xyz456"
                        
                        
                image_filename = plane_id_num + "_" + imageNum
                imageNum+=1
                
                
                #####
                external_id = os.path.splitext(image_filename)[0]
                image_path = os.path.abspath(os.path.join(folder, file))
                plane_id = external_id.split("_")[0]
                #plane_bearing = external_id.split("_")[1]
                #plane_elevation = external_id.split("_")[2]
                #plane_distance = external_id.split("_")[3]
                # place plane image data in a dict
                item = {"file_path": image_path,
                        "external_id": external_id,
                        #"bearing": plane_bearing,
                        #"elevation": plane_elevation,
                        #"distance": plane_distance,
                        "icao24": plane_id}

                image_list.append(item)

    return image_list

## Create or load dataset

In [19]:
# Create dataset
# cell time estimate: 15 seconds

# attempt to open dataset
try:
    dataset = fo.Dataset(name=DATASET_NAME)
    dataset.persistent = True
    print("Created {} dataset".format(DATASET_NAME))
# If the dataset already exists, load it instead 
except ValueError:
    dataset = fo.load_dataset(name=DATASET_NAME)
    print("Dataset already exists.Loaded {} dataset".format(DATASET_NAME))

Dataset already exists.Loaded jsm-test-dataset dataset


## Add image samples to Voxel51 dataset

In [20]:
image_list = build_image_list(IMAGE_DIR)

# Add samples to the dataset
for image in image_list:
    sample = fo.Sample(filepath=image["file_path"])
    sample["external_id"] = fo.Classification(label=image["external_id"])
    #sample["bearing"] = fo.Classification(label=image["bearing"])
    #sample["elevation"] = fo.Classification(label=image["elevation"])
    #sample["distance"] = fo.Classification(label=image["distance"])
    sample["icao24"] = fo.Classification(label=image["icao24"])
    sample.tags.append(DATASOURCE_NAME)
    dataset.add_sample(sample)
    
# print summary of dataset
print(dataset)

Name:        jsm-test-dataset
Media type:  None
Num samples: 0
Persistent:  True
Tags:        []
Sample fields:
    id:       fiftyone.core.fields.ObjectIdField
    filepath: fiftyone.core.fields.StringField
    tags:     fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
