# Creating a Voxel51 Dataset from SkyScan Images
This notebook will create a Voxel51 dataset from images captured by SkyScan. The images will be labeled with the ICAO24 identifer from the ADS-B broadcast.

In [1]:
# Authors: Luke Berndt, John Speed Meyers
# SkyScan Project
# IQT Labs, IQT

In [2]:
import glob
import os
import random #not needed in production build

import fiftyone as fo

In [3]:
# Name of the Voxel51 dataset. It should not exist already.
DATASET_NAME="jsm-test-dataset-1"
# Directory with the images. It should be mapped into the container using the Docker volume command.
IMAGE_DIR="./images/"
DATASOURCE_NAME="testPlaneDetect"

In [4]:
def build_image_list(file_path):
    """Create a list of plane data dicts.
    
    Extract plane data from from jpg filenames.

    The plane image filenames follow a strict naming convention.
    For instance, ac760d_194_50_11882_2021-05-13-14-13-42.jpg translates to
    ac760d - plane_id, aka ICAO 24
    194 - plane bearing
    50 - plane elevation
    11882 - plane distance
    TODO: external_id - ????
    
    Args:
        file_path - Path to images
        
    Returns:
        (list) image_list - a list of plane dict objects
    """
    print("running build_image_list")
    image_list = []
    imageNum = 0
    for folder, _, files in os.walk(file_path):
        for file in files:
            if file.endswith(".jpg"):
                print("fileFound")
                # extract metadata for each plane from filename
                
                ##### - added code
                # FOR TESTING PURPOSES
                # changes the path for all the images to be compatible with the notebooks (renames files)
                # randomly assigns a plane_id_num to each 
                plane_id_num = random.randint(0,2)
                plane_id_str = ""
                match plane_id_num:
                    case 0:
                        plane_id_str = "ac760d"
                    case 1:
                        plane_id_str = "abc123"
                    case 2:
                        plane_id_str = "xyz456"
                        
                image_filename_old = "./images/" + file
                image_filename_new = "./images/" + plane_id_str + "_" + str(imageNum) + ".jpg"
                os.rename(image_filename_old, image_filename_new)
                imageNum+=1
                
                
                #####
                image_filename = os.path.basename(plane_id_str + "_" + str(imageNum) + ".jpg")
                external_id = os.path.splitext(image_filename)[0]
                image_path = os.path.abspath(os.path.join(folder, file))
                plane_id = external_id.split("_")[0]
                #plane_bearing = external_id.split("_")[1]
                #plane_elevation = external_id.split("_")[2]
                #plane_distance = external_id.split("_")[3]
                # place plane image data in a dict
                item = {"file_path": image_path,
                        "external_id": external_id,
                        #"bearing": plane_bearing,
                        #"elevation": plane_elevation,
                        #"distance": plane_distance,
                        "icao24": plane_id}

                image_list.append(item)
                print("added filename: " + image_filename)

    return image_list

## Create or load dataset

In [7]:
# Create dataset
# cell time estimate: 15 seconds

# attempt to open dataset
try:
    dataset = fo.Dataset(name=DATASET_NAME)
    dataset.persistent = True
    print("Created {} dataset".format(DATASET_NAME))
# If the dataset already exists, load it instead 
except ValueError:
    dataset = fo.load_dataset(name=DATASET_NAME)
    print("Dataset already exists.Loaded {} dataset".format(DATASET_NAME))

Created jsm-test-dataset-1 dataset


## Add image samples to Voxel51 dataset

In [8]:
image_list = build_image_list(IMAGE_DIR)
print("len: " + str(len(image_list)))

# Add samples to the dataset
for image in image_list:
    sample = fo.Sample(filepath=image["file_path"])
    sample["external_id"] = fo.Classification(label=image["external_id"])
    #sample["bearing"] = fo.Classification(label=image["bearing"])
    #sample["elevation"] = fo.Classification(label=image["elevation"])
    #sample["distance"] = fo.Classification(label=image["distance"])
    sample["icao24"] = fo.Classification(label=image["icao24"])
    sample.tags.append(DATASOURCE_NAME)
    dataset.add_sample(sample)
    
# print summary of dataset
print(dataset)

running build_image_list
fileFound
added filename: ac760d_1.jpg
fileFound
added filename: xyz456_2.jpg
fileFound
added filename: ac760d_3.jpg
fileFound
added filename: ac760d_4.jpg
fileFound
added filename: xyz456_5.jpg
fileFound
added filename: xyz456_6.jpg
fileFound
added filename: abc123_7.jpg
fileFound
added filename: ac760d_8.jpg
fileFound
added filename: abc123_9.jpg
fileFound
added filename: xyz456_10.jpg
fileFound
added filename: xyz456_11.jpg
fileFound
added filename: ac760d_12.jpg
fileFound
added filename: ac760d_13.jpg
fileFound
added filename: ac760d_14.jpg
fileFound
added filename: xyz456_15.jpg
fileFound
added filename: abc123_16.jpg
fileFound
added filename: abc123_17.jpg
fileFound
added filename: abc123_18.jpg
fileFound
added filename: xyz456_19.jpg
fileFound
added filename: abc123_20.jpg
fileFound
added filename: xyz456_21.jpg
fileFound
added filename: xyz456_22.jpg
fileFound
added filename: ac760d_23.jpg
fileFound
added filename: xyz456_24.jpg
fileFound
added filename

In [12]:
# Diagnostics
print("Datasets:")
print(fo.list_datasets())

try :
    dataset = fo.load_dataset(name=DATASET_NAME)
    print("\nDataset Info for: " + DATASET_NAME)
    fo.pprint(dataset.stats(include_media=True))
    
except ValueError :
    print("No datasets found")
    


# for image in image_list:
#     print("filename: " + fo.Sample(filepath=image["file_path"]))


Datasets:
['jsm-test-dataset-1']

Dataset Info for: jsm-test-dataset-1
Computing metadata...
 100% |███████████████████| 48/48 [8.7s elapsed, 0s remaining, 5.5 samples/s]   
Failed to populate metadata on 48 samples. Use `dataset.exists("metadata", False)` to retrieve them
{
    'samples_count': 48,
    'samples_bytes': 21439,
    'samples_size': '20.9KB',
    'media_bytes': 0,
    'media_size': '0.0B',
    'total_bytes': 21439,
    'total_size': '20.9KB',
}


In [11]:
# ONLY RUN IF YOU WANT TO DELETE YOUR DATASET
# Delete Dataset by setting below variable to True
delete = False
if(delete):
    try:
        dataset = fo.load_dataset(name=DATASET_NAME)
        dataset.delete()
        print("Dataset deleted")
    except ValueError :
        print("No datasets found")
else :
    print("set delete to true to delete")


set delete to true to delete
