In [1]:
# INSTALL WANDB
!pip install wandb -qq

In [2]:
from fastai.vision.all import *
import params

import wandb

In [3]:
URL = 'https://storage.googleapis.com/wandb_course/bdd_simple_1k.zip'

In [4]:
path = Path(untar_data(URL, force_download=True))

In [6]:
# What's the structure of the dataset we downloaded? 
(path/'images').ls()

(#1000) [Path('/home/darek/.fastai/data/bdd_simple_1k/images/baa79505-43ff4345.jpg'),Path('/home/darek/.fastai/data/bdd_simple_1k/images/1ee2d0f9-c1f48031.jpg'),Path('/home/darek/.fastai/data/bdd_simple_1k/images/67ed7da0-b623e02f.jpg'),Path('/home/darek/.fastai/data/bdd_simple_1k/images/a91b7555-00000920.jpg'),Path('/home/darek/.fastai/data/bdd_simple_1k/images/20d3d92d-cb6dd6fc.jpg'),Path('/home/darek/.fastai/data/bdd_simple_1k/images/15f89ba0-2a7e079a.jpg'),Path('/home/darek/.fastai/data/bdd_simple_1k/images/5f6197ca-549a0001.jpg'),Path('/home/darek/.fastai/data/bdd_simple_1k/images/a4215cd6-00000000.jpg'),Path('/home/darek/.fastai/data/bdd_simple_1k/images/5f08b452-00000000.jpg'),Path('/home/darek/.fastai/data/bdd_simple_1k/images/0512a400-d2fa24da.jpg')...]

In [8]:
def label_func(fname):
    return (fname.parent.parent/"labels")/f"{fname.stem}_mask.png"

def get_classes_per_image(mask_data, class_labels):
    unique = list(np.unique(mask_data))
    result_dict = {}
    for _class in class_labels.keys():
        result_dict[class_labels[_class]] = int(_class in unique)
    return result_dict

def _create_table(image_files, class_labels):
    "Create a table with the dataset"
    labels = [str(class_labels[_lab]) for _lab in list(class_labels)]
    table = wandb.Table(columns=["File_Name", "P1", "P2", "Images", "Dataset"] + labels)
    
    for i, image_file in progress_bar(enumerate(image_files), total=len(image_files)):
        image = Image.open(image_file)
        mask_data = np.array(Image.open(label_func(image_file)))
        class_in_image = get_classes_per_image(mask_data, class_labels)
        table.add_data(
            image_file.stem,
            image_file.stem.split('-')[0],
            image_file.stem.split('-')[1],
            wandb.Image(
                    image,
                    masks={
                        "predictions": {
                            "mask_data": mask_data,
                            "class_labels": class_labels,
                        }
                    }
            ),
            "bdd1k",
            *[class_in_image[_lab] for _lab in labels]
        )
    
    return table

In [7]:
# START A NEW WANDB RUN
run = wandb.init(project=params.WANDB_PROJECT, entity=params.ENTITY, job_type="upload")

[34m[1mwandb[0m: Currently logged in as: [33mdarek[0m ([33mav-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
# CREATE AN ARTIFACT
artifact = wandb.Artifact(params.RAW_DATA_AT, type="raw_data")

In [10]:
# ADD FILE TO ARTIFACT
artifact.add_file(path/"LICENSE.txt", name="LICENSE.txt")

<ManifestEntry digest: X+6ZFkDOlnKesJCNt20yRg==>

In [11]:
# ADD FOLDERS TO ARTIFACT
artifact.add_dir(path/'images', name='images')
artifact.add_dir(path/'labels', name='labels')

[34m[1mwandb[0m: Adding directory to artifact (/home/darek/.fastai/data/bdd_simple_1k/images)... Done. 0.3s
[34m[1mwandb[0m: Adding directory to artifact (/home/darek/.fastai/data/bdd_simple_1k/labels)... Done. 0.2s


In [12]:
image_files = get_image_files(path/"images", recurse=False)

In [13]:
table = _create_table(image_files, params.BDD_CLASSES)

In [14]:
# ADD TABLE TO ARTIFACT
artifact.add(table, "eda_table")

<ManifestEntry digest: 6yfn9fJ4P7gcqbZFnSPbSQ==>

In [15]:
# LOG ARTIFACT
run.log_artifact(artifact)

<wandb.sdk.wandb_artifacts.Artifact at 0x7fb52d7e8970>

In [16]:
# FINISH RUN
run.finish()