## **Download Woodscape**

In [1]:
! pip install gdown scikit-learn pyyaml



In [2]:
import zipfile
from pathlib import Path

import gdown


def download_from_google_drive(output: Path, file_id: str):
    url = f"https://drive.google.com/uc?id={file_id}"
    output_zip = output / "archive.zip"

    gdown.download(url, str(output_zip), quiet=False)

    with zipfile.ZipFile(output_zip, "r") as zip_ref:
        zip_ref.extractall(output)

    output_zip.unlink()

In [3]:
import os

current_dir = Path(os.getcwd()).parent
while not (current_dir / "README.md").exists():
    current_dir = current_dir.parent
    if current_dir == current_dir.parent:
        raise FileNotFoundError("Project root not found!")

os.chdir(current_dir)

In [4]:
WOODSCAPE_PATH = Path("src/datasets/WoodScape")

IMAGES_FILE_ID = "1xQ5J4huNmyK9WPoipHTnuZ7lw_J0xhvL"
ANNOTATIONS_FILE_ID = "1k9q1k8rh6hghSPFdmxmkZUd_Mip6pt6F"

In [None]:
download_from_google_drive(WOODSCAPE_PATH, IMAGES_FILE_ID)
download_from_google_drive(WOODSCAPE_PATH, ANNOTATIONS_FILE_ID)

In [5]:
IMAGES_PATH = WOODSCAPE_PATH / "rgb_images"
ANNOTATIONS_PATH = WOODSCAPE_PATH / "instance_annotations"
CLASSINFO_PATH = WOODSCAPE_PATH / "class_info.json"

## **Convert Woodscape annotations to Ultralytics**

In [6]:
YOLO_ANNOTATIONS_PATH = WOODSCAPE_PATH / "yolo_annotations"

In [None]:
import shutil

from src.models.train.convert import convert_Woodscape_to_Ultralytics

convert_Woodscape_to_Ultralytics(
    ANNOTATIONS_PATH, YOLO_ANNOTATIONS_PATH, CLASSINFO_PATH
)

## **Split dataset**

In [7]:
TRAIN_DATASET = WOODSCAPE_PATH / "train_dataset"

In [12]:
from src.models.train.split_dataset import split_dataset

split_dataset(IMAGES_PATH, YOLO_ANNOTATIONS_PATH, TRAIN_DATASET)

## **Create YAML file**

In [8]:
YAML_PATH = WOODSCAPE_PATH / "woodscape.yaml"

In [9]:
import json

import yaml

with CLASSINFO_PATH.open("r", encoding="utf-8") as f:
    CLASSES = json.load(f)["classes"]

data = {
    "path": str(TRAIN_DATASET),
    "train": "images/train",
    "val": "images/val",
    "test": "images/test",
    "nc": len(CLASSES),
    "names": CLASSES,
    "mask_ratio": 2,
    "overlap_mask": True,
    "preprocessing": {
        "auto_augment": "none",
        "erasing": 0.0,
        "mosaic": 0.5,
        "copy_paste": 0.0,
    },
}

with YAML_PATH.open("w", encoding="utf-8") as f:
    yaml.dump(data, f, default_flow_style=False)