In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
from pathlib import Path

import bb
import tt
import bbedit

LOG = logging.getLogger(__name__)
tt.logging_init()

In [3]:
CATEGORIES = [
    "chicken",
    "cow",
    "creeper",
    "enderman",
    "pig",
    "sheep",
    "skeleton",
    "spider",
    "zombie",
]
CATEGORIES.sort()


# Create dataset with set of images

In [None]:
# Create new Dataset with image files
# data_path = Path.home() / "src/data"
# new = bb.Dataset.new(file_path=data_path / "new/info.json", categories=CATEGORIES)
# new.save()

# Run new dataset through latest model

In [None]:
import ultralytics as ul

model_path = "/Users/joe/src/data/yolo/runs/detect/keep/no_small/weights/best.pt"
model = ul.YOLO(model_path)

data_path = Path.home() / "src/data"
new_path = data_path / "new/info.json"
new_dset = bb.Dataset.load(new_path)

def detect_single(ir: bb.ImageResult) -> bb.ImageResult:
    pred = model(ir.full_path, verbose=False)[0]
    return bb.yr_to_ir(pred)

In [None]:
# Display results of one query at a time
bb.InferViewer[bb.ImageResult](
    detect_single, new_dset.images, new_dset.categories
).show_widget()

In [None]:
for ir in new_dset.iter_images():
    pred_ir = detect_single(ir)
    ir.bboxes = pred_ir.bboxes

In [None]:
new_dset.save()

# Hand classify

In [4]:
bbedit.DEBUG.clear_output()
bbedit.DEBUG

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…

In [5]:
data_path = Path.home() / "src/data"
new_dset_path = data_path / "new/info.json"
bbe = bbedit.BBoxEdit(new_dset_path)
bbe.display()

VBox(children=(HBox(children=(BBoxWidget(bboxes=[{'x': 0.0, 'y': 433.0, 'width': 365.9817886352539, 'height': …

In [6]:
check_dset = bb.Dataset.load(new_dset_path)
check_dset.view()

interactive(children=(IntSlider(value=0, continuous_update=False, description='Image:', max=144), Output()), _…

# Merge

In [23]:
import shutil
import bb

data_dir = Path.home() / "src/data"
input_dirs = [data_dir / "minecraft", data_dir / "new"]
output_dir = data_dir / "mobs_merged"

BBOX_FILE = "info.json"
IMAGE_DIR = "images"


def merge(input_dirs, output_dir):
    output_dir.mkdir(parents=True, exist_ok=True)
    images_subdir = output_dir / IMAGE_DIR
    images_subdir.mkdir(parents=True, exist_ok=True)
    images_rel = Path(IMAGE_DIR)

    new_dset = bb.Dataset(file_path=output_dir / BBOX_FILE)

    categories = None
    file_set = set()
    for input_dir in input_dirs:
        dset = bb.Dataset.load(input_dir / BBOX_FILE)
        if categories is None:
            categories = dset.categories
            new_dset.categories = categories
        assert categories == dset.categories, f"Categories-Mismatch {input_dir}"
        print(f"Input dir={input_dir}, n_files={len(dset.images)}")
        for ir in dset.iter_images():
            fpath = Path(ir.file)
            out_rel = str(images_rel / fpath.name)
            if out_rel in file_set:
                raise FileExistsError(f"Duplicate filename: {out_rel}")

            file_set.add(out_rel)
            new_ir = ir.model_copy(
                update={"file": out_rel}, deep=True
            )
            new_dset.images.append(new_ir)
            shutil.copy2(input_dir / fpath, images_subdir)
    new_dset.save(output_dir / BBOX_FILE)
    print(f"Output dir={output_dir}, n_files={len(list(images_subdir.iterdir()))}")


merge(input_dirs, output_dir)

Input dir=/Users/joe/src/data/minecraft, n_files=184
Input dir=/Users/joe/src/data/new, n_files=145
Output dir=/Users/joe/src/data/mobs_merged, n_files=329


In [24]:
merged_dset = bb.Dataset.load(data_dir / "mobs_merged/info.json")
merged_dset.view()

interactive(children=(IntSlider(value=0, continuous_update=False, description='Image:', max=328), Output()), _…

# Check for small boxes

In [7]:
# Check for small boxes
import bb
dset_path = data_path / "minecraft/info.json"

dset = bb.Dataset.load(dset_path)
df = dset.to_df()
for col in ["x1", "y1", "x2", "y2"]:
    df[col] *= 640
df["area"] = (df["x2"] - df["x1"]) * (df["y2"] - df["y1"])
display(df[df["area"] < 512])
display(df[df["area"] < 1024])

Unnamed: 0,image_idx,file,category,x1,y1,x2,y2,area


Unnamed: 0,image_idx,file,category,x1,y1,x2,y2,area
1,1,/Users/joe/src/data/minecraft/images/f21b358a-...,cow,377.6,250.88,407.04,278.4,810.1888
18,8,/Users/joe/src/data/minecraft/images/cda62cfe-...,chicken,331.52,248.96,354.56,284.16,811.008
27,14,/Users/joe/src/data/minecraft/images/a8d00719-...,chicken,144.0,257.28,169.6,285.44,720.896
52,26,/Users/joe/src/data/minecraft/images/6ad77380-...,pig,487.0,148.0,509.0,192.0,968.0
155,97,/Users/joe/src/data/minecraft/images/frame_237...,zombie,405.76,268.8,428.8,307.2,884.736
173,110,/Users/joe/src/data/minecraft/images/frame_189...,spider,46.08,229.12,87.68,249.6,851.968
230,155,/Users/joe/src/data/minecraft/images/frame_121...,enderman,220.8,98.56,242.56,140.8,919.1424
243,167,/Users/joe/src/data/minecraft/images/frame_114...,cow,254.72,6.4,279.68,38.4,798.72


In [8]:
import bb
dset = bb.Dataset.load(dset_path)
dset.category_stats()

Unnamed: 0_level_0,count,avg_width,avg_height,avg_area
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
zombie,43,0.1722,0.2955,0.0704
pig,39,0.1702,0.1515,0.0335
cow,37,0.1804,0.2107,0.0545
chicken,34,0.1367,0.1619,0.0286
sheep,30,0.2009,0.2237,0.061
skeleton,29,0.1374,0.265,0.0531
spider,25,0.2417,0.165,0.051
creeper,17,0.1257,0.2505,0.0364
enderman,11,0.1311,0.2282,0.0365


In [9]:
dset.dataset_stats()

num_images                    184
num_bboxes                    265
avg_bboxes_per_image     1.440217
num_categories                  9
most_common_category       zombie
least_common_category    enderman
avg_bbox_width           0.169803
avg_bbox_height          0.215856
avg_bbox_area            0.049007
min_bbox_area             0.00176
max_bbox_area            0.566857
dtype: object