In [1]:
import argparse
import os
import warnings

import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import TQDMProgressBar

from nanodet.data.collate import naive_collate
from nanodet.data.dataset import build_dataset
from nanodet.evaluator import build_evaluator
from nanodet.trainer.task import TrainingTask
from nanodet.util import (
    NanoDetLightningLogger,
    cfg,
    convert_old_model,
    env_utils,
    load_config,
    load_model_weight,
    mkdir,
)

  __import__("pkg_resources").declare_namespace(__name__)


In [3]:
# Setup argparse
args = argparse.Namespace()
args.config = '../config/nanodet-plus-m_320.yml'
load_config(cfg, args.config)

In [4]:
logger = NanoDetLightningLogger(cfg.save_dir)
logger.dump_cfg(cfg)

logger.info("Setting up data...")
train_dataset = build_dataset(cfg.data.train, "train")
val_dataset = build_dataset(cfg.data.val, "test")

evaluator = build_evaluator(cfg.evaluator, val_dataset)

train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=cfg.device.batchsize_per_gpu,
    shuffle=True,
    num_workers=cfg.device.workers_per_gpu,
    pin_memory=True,
    collate_fn=naive_collate,
    drop_last=True,
)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=cfg.device.batchsize_per_gpu,
    shuffle=False,
    num_workers=cfg.device.workers_per_gpu,
    pin_memory=True,
    collate_fn=naive_collate,
    drop_last=False,
)

[1m[35m[NanoDet][0m[34m[06-09 11:12:06][0m[32mINFO:[0m[97mSetting up data...[0m


creating index...
index created!
creating index...
index created!


In [5]:
logger.info("Creating model...")
task = TrainingTask(cfg, evaluator)

[1m[35m[NanoDet][0m[34m[06-09 11:12:12][0m[32mINFO:[0m[97mCreating model...[0m
INFO:NanoDet:Creating model...


model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


In [6]:
model_resume_path = (
    os.path.join(cfg.save_dir, "model_last.ckpt")
    if "resume" in cfg.schedule
    else None
)

if cfg.device.gpu_ids == [-1]:
    logger.info("Using CPU training")
    accelerator, devices, strategy, precision = (
        "cpu",
        None,
        None,
        cfg.device.precision,
    )
else:
    accelerator, devices, strategy, precision = (
        "gpu",
        cfg.device.gpu_ids,
        None,
        cfg.device.precision,
    )

[1m[35m[NanoDet][0m[34m[06-09 11:12:13][0m[32mINFO:[0m[97mUsing CPU training[0m
INFO:NanoDet:Using CPU training


In [None]:
if devices and len(devices) > 1:
    strategy = "ddp"
    env_utils.set_multi_processing(distributed=True)

trainer = pl.Trainer(
    default_root_dir=cfg.save_dir,
    max_epochs=cfg.schedule.total_epochs,
    check_val_every_n_epoch=cfg.schedule.val_intervals,
    accelerator=accelerator,
    devices=devices,
    log_every_n_steps=cfg.log.interval,
    num_sanity_val_steps=0,
    callbacks=[TQDMProgressBar(refresh_rate=0)],  # disable tqdm bar
    logger=logger,
    benchmark=cfg.get("cudnn_benchmark", True),
    gradient_clip_val=cfg.get("grad_clip", 0.0),
    strategy=strategy,
    precision=precision,
)

trainer.fit(task, train_dataloader, val_dataloader, ckpt_path=model_resume_path)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



  | Name      | Type        | Params
------------------------------------------
0 | model     | NanoDetPlus | 4.2 M 
1 | avg_model | NanoDetPlus | 4.2 M 
------------------------------------------
8.3 M     Trainable params
0         Non-trainable params
8.3 M     Total params
33.309    Total estimated model params size (MB)
[1m[35m[NanoDet][0m[34m[06-09 11:12:13][0m[32mINFO:[0m[97mWeight Averaging is enabled[0m
INFO:NanoDet:Weight Averaging is enabled
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
[1m[35m[NanoDet][0m[34m[06-09 11:13:29][0m[32mINFO:[0m[97mTrain|Epoch1/5|Iter0(1/162)| mem:0G| lr:1.00e-07| loss_qfl:0.0101| loss_bbox:0.1730| loss_dfl:0.0508| aux_loss_qfl:0.0104| aux_loss_bbox:0.1864| aux_loss_dfl:0.0579| [0m
INFO:NanoDet:Train|Epoch1/5|Iter0(1/162)| mem:0G| lr:1.00e-07| loss_qfl:0.0101| loss_bbox:0.1730| loss_dfl:0.0508| aux_loss_qfl:0.0104| aux_loss_bbox:0.1864| aux_loss_dfl:0.0579| 
[1m[35m[NanoDet][0m[34m[06-09 11:13:31][