## **Download Woodscape**

In [None]:
! pip install gdown scikit-learn pyyaml

In [2]:
import os
import zipfile

import gdown


def download_from_google_drive(output: str, file_id: str):
    url = f"https://drive.google.com/uc?id={file_id}"
    output_zip = f"{output}/archive.zip"

    gdown.download(url, output_zip, quiet=False)

    with zipfile.ZipFile(output_zip, "r") as zip_ref:
        zip_ref.extractall(output)

    os.remove(output_zip)

In [1]:
import os

os.chdir("../../../..")

In [None]:
WOODSCAPE_PATH = "src/datasets/WoodScape"
IMAGES_FILE_ID = "1xQ5J4huNmyK9WPoipHTnuZ7lw_J0xhvL"
ANNOTATIONS_FILE_ID = "1k9q1k8rh6hghSPFdmxmkZUd_Mip6pt6F"

download_from_google_drive(WOODSCAPE_PATH, IMAGES_FILE_ID)
download_from_google_drive(WOODSCAPE_PATH, ANNOTATIONS_FILE_ID)

In [3]:
WOODSCAPE_PATH = "src/datasets/WoodScape"

IMAGES_PATH = f"{WOODSCAPE_PATH}/rgb_images"
ANNOTATIONS_PATH = f"{WOODSCAPE_PATH}/instance_annotations"
CLASSINFO_PATH = f"{WOODSCAPE_PATH}/class_info.json"

## **Convert Woodscape annotations to Ultralytics**

In [4]:
YOLO_ANNOTATIONS_PATH = f"{WOODSCAPE_PATH}/yolo_annotations"

In [None]:
import shutil

from src.models.train.convert import convert_Woodscape_to_Ultralytics

convert_Woodscape_to_Ultralytics(
    ANNOTATIONS_PATH, YOLO_ANNOTATIONS_PATH, CLASSINFO_PATH
)
shutil.rmtree(ANNOTATIONS_PATH)

## **Split dataset**

In [5]:
TRAIN_DATASET = f"{WOODSCAPE_PATH}/train_dataset"

In [12]:
from src.models.train.split_dataset import split_dataset

split_dataset(IMAGES_PATH, YOLO_ANNOTATIONS_PATH, TRAIN_DATASET)

In [18]:
shutil.rmtree(YOLO_ANNOTATIONS_PATH)
shutil.rmtree(IMAGES_PATH)

## **Create YAML file**

In [None]:
import json

import yaml

with open(CLASSINFO_PATH, "r", encoding="utf-8") as f:
    CLASSES = json.load(f)["classes"]

data = {
    "path": TRAIN_DATASET,
    "train": "images/train",
    "val": "images/val",
    "test": "images/test",
    "nc": len(CLASSES),
    "names": CLASSES,
    "mask_ratio": 2,
    "overlap_mask": True,
    "preprocessing": {
        "auto_augment": "none",
        "erasing": 0.0,
        "mosaic": 0.5,
        "copy_paste": 0.0,
    },
}

with open(f"{WOODSCAPE_PATH}/woodscape.yaml", "w") as file:
    yaml.dump(data, file, default_flow_style=False)