In [3]:
from fastai.vision.all import *
import params  
import wandb

In [14]:
for variable in dir(params):
    if not variable.startswith('__'):
        print(variable, ':', getattr(params, variable))

BDD_CLASSES : {0: 'background', 1: 'road', 2: 'traffic light', 3: 'traffic sign', 4: 'person', 5: 'vehicle', 6: 'bicycle'}
ENTITY : None
PROCESSED_DATA_AT : bdd_simple_1k_split
RAW_DATA_AT : bdd_simple_1k
WANDB_PROJECT : mlops-course-001


In [15]:
DEBUG = False

In [16]:
#download sample datasets using fastai untar_data, stored in local fastai repo
URL = 'https://storage.googleapis.com/wandb_course/bdd_simple_1k.zip'
path = Path(untar_data(URL, force_download=True))
path.ls()

(#3) [Path('/home/yi/.fastai/data/bdd_simple_1k/LICENSE.txt'),Path('/home/yi/.fastai/data/bdd_simple_1k/images'),Path('/home/yi/.fastai/data/bdd_simple_1k/labels')]

In [22]:
def label_func(fname):
    return (fname.parent.parent/"labels")/f"{fname.stem}_mask.png"

def get_classes_per_image(mask_data, class_labels):
    unique = list(np.unique(mask_data))
    result_dict = {}
    for _class in class_labels.keys():
        result_dict[class_labels[_class]] = int(_class in unique)
    return result_dict

def _create_table(image_files, class_labels):
    "Create a table with the dataset"
    labels = [str(class_labels[_lab]) for _lab in list(class_labels)]
    table = wandb.Table(columns=["File_Name", "Images", "Split"] + labels)
    
    for i, image_file in progress_bar(enumerate(image_files), total=len(image_files)):
        image = Image.open(image_file)
        mask_data = np.array(Image.open(label_func(image_file)))
        class_in_image = get_classes_per_image(mask_data, class_labels)
        table.add_data(
            str(image_file.name),
            wandb.Image(
                    image,
                    masks={
                        "predictions": {
                            "mask_data": mask_data,
                            "class_labels": class_labels,
                        }
                    }
            ),
            "None", # we don't have a dataset split yet
            *[class_in_image[_lab] for _lab in labels]
        )
    
    return table

In [17]:
# Init wandb project, normally experiments, entity is team
run = wandb.init(project=params.WANDB_PROJECT, entity=params.ENTITY, job_type="upload")
# Create artifact
raw_data_at = wandb.Artifact(params.RAW_DATA_AT, type="raw_data")

[34m[1mwandb[0m: Currently logged in as: [33myihuanghz95[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [18]:
# Add file to Artufact
raw_data_at.add_file(path/'LICENSE.txt', name='LICENSE.txt')

<wandb.sdk.artifacts.artifact_manifest_entry.ArtifactManifestEntry at 0x7fbe21788a00>

In [19]:
raw_data_at.add_dir(path/'images', name='images')
raw_data_at.add_dir(path/'labels', name='labels')

[34m[1mwandb[0m: Adding directory to artifact (/home/yi/.fastai/data/bdd_simple_1k/images)... Done. 0.4s
[34m[1mwandb[0m: Adding directory to artifact (/home/yi/.fastai/data/bdd_simple_1k/labels)... Done. 0.4s


In [20]:
image_files = get_image_files(path/"images", recurse=False)

# sample a subset if DEBUG
if DEBUG: image_files = image_files[:10]

In [23]:
table = _create_table(image_files, params.BDD_CLASSES)

In [24]:
raw_data_at.add(table, "eda_table")

<wandb.sdk.artifacts.artifact_manifest_entry.ArtifactManifestEntry at 0x7fbe1ce67160>

In [25]:
# log artifact
run.log_artifact(raw_data_at)
# finish run
run.finish()

<img src="wnb_eda.png">