YOLO-World
======


**YOLO-World: Real-Time Open-Vocabulary Object Detection**

 * Paper: https://arxiv.org/abs/2401.17270


## Installation

```bash

conda create -n yolow python=3.10 -y
conda activate yolow

# Install PyTorch for CUDA 11.6 from the official channels
conda install -y pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 \
                pytorch-cuda=11.6 -c pytorch -c nvidia

# Optional: install GCC 9 for compatibility when compiling detectron2
conda install -y gcc=9 gxx=9 -c conda-forge

# install cudatoolkit-11.6
conda install -y cudatoolkit-dev=11.6 -c conda-forge

# set CUDA_HOME env-var:
export CUDA_HOME=$CONDA_PREFIX  # since cudatoolkit-dev installs CUDA here
export PATH=$CUDA_HOME/bin:$PATH
export LD_LIBRARY_PATH=$CUDA_HOME/lib:$LD_LIBRARY_PATH

# Verify nvcc version
# It should report "Cuda compilation tools, release 11.6".
nvcc -V

# clone YOLO-World repo:
git clone --recursive https://github.com/AILab-CVC/YOLO-World.git YOLOWorld_repo
cd YOLOWorld_repo
pip install -e .

# OpenMMLab
pip install -U openmim
mim install mmengine
mim install "mmcv>=2.0.0"


# install mmdet https://github.com/open-mmlab/mmdetection
git clone https://github.com/open-mmlab/mmdetection.git
# if the `git clone` command fails, deactivate conda
# somehthing got messed up after compiling previous libraries
# re-activae conda after clone
cd mmdetection
pip install -v -e .

# downgrade numpy < 2.0
pip install "numpy<2"
```

## Download model

```bash
wget https://huggingface.co/wondervictor/YOLO-World-V2.1/resolve/main/m_stage2-9987dcb1.pth
```

### Additional Setup

```bash
cp YOLOWorld_repo/configs/pretrain/yolo_world_v2_m_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py .
```

In [1]:
import os
import sys
import cv2
import argparse
from typing import List, Tuple
import os.path as osp

import cv2
import torch
from mmengine.config import Config, DictAction
from mmengine.runner.amp import autocast
from mmengine.dataset import Compose
from mmengine.utils import ProgressBar
from mmdet.apis import init_detector
from mmdet.utils import get_test_pipeline_cfg

sys.path.append("YOLOWorld_repo")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
CONFIG_PATH: str = (
    #"YOLOWorld_repo/configs/pretrain/"
    "yolo_world_v2_m_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py"
)

CHECKPOINT_PATH: str = "m_stage2-9987dcb1.pth"

TOPK: int = 100
SCORE_THRESHOLD: float = 0.1

def prepare_pipeline(cfg: Config) -> Compose:
    # Fetch a copy of the test pipeline.  ``get_test_pipeline_cfg`` will
    # automatically convert MMYOLO specifics into an MMEngine compatible format.
    test_pipeline_cfg = get_test_pipeline_cfg(cfg=cfg)
    # ``LoadImageFromFile`` expects a file path; we leave it as is.
    return Compose(test_pipeline_cfg)
