In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
from pathlib import Path

import bb
import tt
import bbedit

LOG = logging.getLogger(__name__)
tt.logging_init()

In [3]:
CATEGORIES = [
    "chicken",
    "cow",
    "creeper",
    "enderman",
    "pig",
    "sheep",
    "skeleton",
    "spider",
    "zombie",
]
CATEGORIES.sort()


# Create dataset with set of images

In [None]:
# Create new Dataset with image files
# data_path = Path.home() / "src/data"
# new = bb.Dataset.new(file_path=data_path / "new/info.json", categories=CATEGORIES)
# new.save()

# Run new dataset through latest model

In [None]:
import ultralytics as ul

model_path = "/Users/joe/src/data/yolo/runs/detect/keep/no_small/weights/best.pt"
model = ul.YOLO(model_path)

data_path = Path.home() / "src/data"
new_path = data_path / "new/info.json"
new_dset = bb.Dataset.load(new_path)

def detect_single(ir: bb.ImageResult) -> bb.ImageResult:
    pred = model(ir.full_path, verbose=False)[0]
    return bb.yr_to_ir(pred)

In [None]:
# Display results of one query at a time
bb.InferViewer[bb.ImageResult](
    detect_single, new_dset.images, new_dset.categories
).show_widget()

In [None]:
for ir in new_dset.iter_images():
    pred_ir = detect_single(ir)
    ir.bboxes = pred_ir.bboxes

In [None]:
new_dset.save()

# Hand classify

In [None]:
bbedit.DEBUG.clear_output()
bbedit.DEBUG

In [None]:
data_path = Path.home() / "src/data"
new_dset_path = data_path / "new/info.json"
bbe = bbedit.BBoxEdit(new_dset_path)
bbe.display()

In [None]:
check_dset = bb.Dataset.load(new_dset_path)
check_dset.view()

# Merge

In [None]:
import shutil
import bb

data_dir = Path.home() / "src/data"
input_dirs = [data_dir / "minecraft", data_dir / "new"]
output_dir = data_dir / "mobs_merged"

BBOX_FILE = "info.json"
IMAGE_DIR = "images"


def merge(input_dirs, output_dir):
    output_dir.mkdir(parents=True, exist_ok=True)
    images_subdir = output_dir / IMAGE_DIR
    images_subdir.mkdir(parents=True, exist_ok=True)
    images_rel = Path(IMAGE_DIR)

    new_dset = bb.Dataset(file_path=output_dir / BBOX_FILE)

    categories = None
    file_set = set()
    for input_dir in input_dirs:
        dset = bb.Dataset.load(input_dir / BBOX_FILE)
        if categories is None:
            categories = dset.categories
            new_dset.categories = categories
        assert categories == dset.categories, f"Categories-Mismatch {input_dir}"
        print(f"Input dir={input_dir}, n_files={len(dset.images)}")
        for ir in dset.iter_images():
            fpath = Path(ir.file)
            out_rel = str(images_rel / fpath.name)
            if out_rel in file_set:
                raise FileExistsError(f"Duplicate filename: {out_rel}")

            file_set.add(out_rel)
            new_ir = ir.model_copy(
                update={"file": out_rel}, deep=True
            )
            new_dset.images.append(new_ir)
            shutil.copy2(input_dir / fpath, images_subdir)
    new_dset.save(output_dir / BBOX_FILE)
    print(f"Output dir={output_dir}, n_files={len(list(images_subdir.iterdir()))}")


merge(input_dirs, output_dir)

In [6]:
data_dir = Path.home() / "src/data"
merged_dset = bb.Dataset.load(data_dir / "mobs_merged/info.json")
merged_dset.view()

interactive(children=(IntSlider(value=0, continuous_update=False, description='Image:', max=328), Output()), _â€¦

# Check for small boxes

In [None]:
# Check for small boxes
import bb
dset_path = data_path / "minecraft/info.json"

dset = bb.Dataset.load(dset_path)
df = dset.to_df()
for col in ["x1", "y1", "x2", "y2"]:
    df[col] *= 640
df["area"] = (df["x2"] - df["x1"]) * (df["y2"] - df["y1"])
display(df[df["area"] < 512])
display(df[df["area"] < 1024])

In [None]:
import bb
dset = bb.Dataset.load(dset_path)
dset.category_stats()

In [None]:
dset.dataset_stats()