In [1]:
"""
Author: Wouter Van Gansbeke

Main file for training auto-encoders and vaes
Licensed under the CC BY-NC 4.0 license (https://creativecommons.org/licenses/by-nc/4.0/)
"""

import os
import sys
sys.argv = sys.argv[:1]
import json
import hydra
import wandb
import builtins
from termcolor import colored
from datetime import datetime
from omegaconf import OmegaConf, DictConfig
from typing import Dict, Any
from termcolor import colored
import torch
import torch.multiprocessing as mp
import torch.distributed as dist
from diffusers import AutoencoderKL

from ldmseg.models import GeneralVAESeg
from ldmseg.trainers import TrainerAE
from ldmseg.utils import prepare_config, Logger, is_main_process

from hydra import initialize, compose
from main_worker_ae import main_worker

In [2]:
# -------------------------------------------------------------------------------
# Step 0: 清理 Notebook 自动传入的额外命令行参数
sys.argv = sys.argv[:1]

# -------------------------------------------------------------------------------
# Step 1: 使用 Hydra API 加载配置
# 请根据实际情况修改 config_path，例如你的配置文件存放在 "configs/" 文件夹下
with initialize(config_path="tools/configs/", job_name="config"):
    cfg = compose(config_name="config")
# 将 OmegaConf 对象转换为普通字典
cfg = OmegaConf.to_object(cfg)

# -------------------------------------------------------------------------------
# Step 2: 配置分块、组合与预处理
wandb.config = cfg
# 这里假设配置文件中存在以下键；如果不存在，请在配置文件中添加或使用 .get() 方法提供默认值
cfg_dist    = cfg['distributed']
cfg_dataset = cfg['datasets']
cfg_base    = cfg['base']
project_dir = cfg['setup']

# 合并 base 与数据集专用配置（让数据集配置覆盖 base 中的同名字段）
cfg_dataset = {**cfg_base, **cfg_dataset}

root_dir = os.path.join(cfg['env']['root_dir'], project_dir)
data_dir = cfg['env']['data_dir']

# 调用 prepare_config 进一步整理数据集配置，返回更新后的配置和项目名称
cfg_dataset, project_name = prepare_config(cfg_dataset, root_dir, data_dir, run_idx=cfg['run_idx'])
project_name = f"{cfg_dataset['train_db_name']}_{project_name}"
print(colored(f"Project name: {project_name}", 'red'))

# -------------------------------------------------------------------------------
# Step 3: 配置分布式训练参数
# 若配置中 world_size 为 -1 且 dist_url 为 "env://" 则根据环境变量 WORLD_SIZE 更新配置
if cfg_dist['dist_url'] == "env://" and cfg_dist['world_size'] == -1:
    cfg_dist['world_size'] = int(os.environ.get("WORLD_SIZE", 1))
cfg_dist['distributed'] = cfg_dist['world_size'] > 1 or cfg_dist['multiprocessing_distributed']

# -------------------------------------------------------------------------------
# Step 4: Debug 模式下特殊设置
if cfg.get('debug', True):
    print(colored("Running in debug mode!", "red"))
    cfg_dist['world_size'] = 1
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    cfg_dataset['train_kwargs']['num_workers'] = 0
    cfg_dataset['train_kwargs']['train_num_steps']=1

ngpus_per_node = torch.cuda.device_count()
# 根据 GPU 数量调整世界大小
cfg_dist['world_size'] = ngpus_per_node * cfg_dist['world_size']
print(colored(f"World size: {cfg_dist['world_size']}", 'blue'))

# -------------------------------------------------------------------------------
# Step 6: 启动训练
cfg_dataset['train_kwargs']['train_num_steps']=500
cfg_dataset['lr_scheduler_kwargs']['warmup_iters']=100
if cfg.get('debug', True):
    # Debug 模式下直接调用 main_worker（单 GPU 单进程）
    main_worker(0, ngpus_per_node, cfg_dist, cfg_dataset, project_name)
else:
    # 非 Debug 模式下使用 mp.spawn 启动分布式训练（注意：Notebook 中多进程可能会有额外问题）
    mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, cfg_dist, cfg_dataset, project_name))

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  with initialize(config_path="tools/configs/", job_name="config"):


[31mUsing current time as run identifier: 20250415_215902[0m
[31mProject name: kitti_20250415_215902[0m
[34mWorld size: 2[0m
Use GPU: 0 for printing
[33mStarting distributed training[0m
[33mInitialized distributed training[0m
[31m{
    "backbone": "unet",
    "data_dir": "/root/autodl-tmp/video_sequence",
    "ema_kwargs": {
        "decay": 0.9999,
        "device": "cuda"
    },
    "ema_on": false,
    "eval_kwargs": {
        "batch_size": 16,
        "count_th": 512,
        "mask_th": 0.5,
        "num_workers": 2,
        "overlap_th": 0.5,
        "print_freq": 100,
        "vis_every": 5000
    },
    "eval_only": false,
    "fill_value": 0.5,
    "has_bg": false,
    "ignore_label": 0,
    "image_scaling_factor": 0.18215,
    "inpainting_strength": 0.0,
    "load_path": null,
    "log_file": "/root/autodl-tmp/Video-latent-diffusion-panoptic-segmentation/simple_diffusion/kitti/run_20250415_215902/logger.txt",
    "loss_kwargs": {
        "cost_class": 1.0,
        "

7it [00:04,  1.44it/s]
7it [00:05,  1.39it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.79it/s]
100%|██████████| 7/7 [00:03<00:00,  1.78it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[34m-------------------------[0m
[34mStarting epoch 0[0m


0it [00:00, ?it/s]

Learning rate is set to: 0.000e+00
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.77it/s]
7it [00:03,  1.77it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:04<00:00,  1.75it/s]
100%|██████████| 7/7 [00:04<00:00,  1.73it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.625406[0m
[33mETA: 0:57:03.535767[0m
[34m-------------------------[0m
[34mStarting epoch 1[0m


0it [00:00, ?it/s]

Learning rate is set to: 3.030e-06
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.79it/s]
7it [00:03,  1.75it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.79it/s]
100%|██████████| 7/7 [00:03<00:00,  1.76it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.636567[0m
[33mETA: 0:44:21.479976[0m
[34m-------------------------[0m
[34mStarting epoch 2[0m


0it [00:00, ?it/s]

Learning rate is set to: 6.061e-06
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.79it/s]
7it [00:04,  1.75it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.79it/s]
100%|██████████| 7/7 [00:03<00:00,  1.77it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.562929[0m
[33mETA: 0:39:55.677859[0m
[34m-------------------------[0m
[34mStarting epoch 3[0m


0it [00:00, ?it/s]

Learning rate is set to: 9.091e-06
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.76it/s]
7it [00:03,  1.75it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.82it/s]
100%|██████████| 7/7 [00:03<00:00,  1.80it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.411560[0m
[33mETA: 0:37:30.827081[0m
[34m-------------------------[0m
[34mStarting epoch 4[0m


0it [00:00, ?it/s]

Learning rate is set to: 1.212e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.82it/s]
7it [00:03,  1.78it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.79it/s]
100%|██████████| 7/7 [00:03<00:00,  1.77it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.338343[0m
[33mETA: 0:35:56.979443[0m
[34m-------------------------[0m
[34mStarting epoch 5[0m


0it [00:00, ?it/s]

Learning rate is set to: 1.515e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.77it/s]
7it [00:04,  1.75it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.77it/s]
100%|██████████| 7/7 [00:03<00:00,  1.76it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.509883[0m
[33mETA: 0:34:55.237830[0m
[34m-------------------------[0m
[34mStarting epoch 6[0m


0it [00:00, ?it/s]

Learning rate is set to: 1.818e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.77it/s]
7it [00:04,  1.75it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:04<00:00,  1.75it/s]
100%|██████████| 7/7 [00:04<00:00,  1.74it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.623252[0m
[33mETA: 0:34:10.439415[0m
[34m-------------------------[0m
[34mStarting epoch 7[0m


0it [00:00, ?it/s]

Learning rate is set to: 2.121e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.75it/s]
7it [00:04,  1.73it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:04<00:00,  1.75it/s]
100%|██████████| 7/7 [00:04<00:00,  1.74it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.588434[0m
[33mETA: 0:33:33.243225[0m
[34m-------------------------[0m
[34mStarting epoch 8[0m


0it [00:00, ?it/s]

Learning rate is set to: 2.424e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.77it/s]
7it [00:03,  1.75it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.80it/s]
100%|██████████| 7/7 [00:03<00:00,  1.75it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.523594[0m
[33mETA: 0:33:00.599532[0m
[34m-------------------------[0m
[34mStarting epoch 9[0m


0it [00:00, ?it/s]

Learning rate is set to: 2.727e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.78it/s]
7it [00:04,  1.75it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.76it/s]



KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.507817[0m
[33mETA: 0:32:31.931772[0m
[34m-------------------------[0m
[34mStarting epoch 10[0m


0it [00:00, ?it/s]

Learning rate is set to: 3.030e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.78it/s]
7it [00:04,  1.70it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:04<00:00,  1.73it/s]
100%|██████████| 7/7 [00:04<00:00,  1.72it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.821774[0m
[33mETA: 0:32:10.836173[0m
[34m-------------------------[0m
[34mStarting epoch 11[0m


0it [00:00, ?it/s]

Learning rate is set to: 3.333e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:04,  1.68it/s]
7it [00:04,  1.70it/s]


images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.76it/s]
100%|██████████| 7/7 [00:03<00:00,  1.75it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.881859[0m
[33mETA: 0:31:52.062327[0m
[34m-------------------------[0m
[34mStarting epoch 12[0m


0it [00:00, ?it/s]

Learning rate is set to: 3.636e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:03,  1.77it/s]
7it [00:03,  1.76it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.76it/s]
100%|██████████| 7/7 [00:03<00:00,  1.76it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.618328[0m
[33mETA: 0:31:31.226928[0m
[34m-------------------------[0m
[34mStarting epoch 13[0m


0it [00:00, ?it/s]

Learning rate is set to: 3.939e-05
[33mModel saved for run kitti_20250415_215902[0m
[34mDistributed evaluation on the validation set[0m
Thresholding output


7it [00:04,  1.74it/s]
7it [00:03,  1.75it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

images saved
Evaluation for semantic segmentation - val set
mIoU is 0.00


100%|██████████| 7/7 [00:03<00:00,  1.79it/s]
100%|██████████| 7/7 [00:03<00:00,  1.78it/s]


KITTI 全景评价结果： {'pq': 0.0, 'sq': 0.0, 'rq': 0.0, 'iou_sum': 0.0, 'tp': 0, 'fp': 4050, 'fn': 0}
[33mAverage loss: nan[0m
[33mEpoch took 0:00:11.591492[0m
[33mETA: 0:31:11.414963[0m
[34m-------------------------[0m
[34mStarting epoch 14[0m


0it [00:00, ?it/s]

Learning rate is set to: 4.242e-05


ProcessRaisedException: 

-- Process 0 terminated with the following error:
Traceback (most recent call last):
  File "/root/miniconda3/envs/LDMSeg/lib/python3.11/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
    fn(i, *args)
  File "/root/autodl-tmp/Video-latent-diffusion-panoptic-segmentation/main_worker_ae.py", line 108, in main_worker
    trainer.train_loop()
  File "/root/autodl-tmp/Video-latent-diffusion-panoptic-segmentation/ldmseg/trainers/trainers_ae.py", line 406, in train_loop
    self.save(epoch)
  File "/root/autodl-tmp/Video-latent-diffusion-panoptic-segmentation/ldmseg/trainers/trainers_ae.py", line 507, in save
    torch.save(data, str(self.results_folder / 'model.pt'))
  File "/root/miniconda3/envs/LDMSeg/lib/python3.11/site-packages/torch/serialization.py", line 440, in save
    with _open_zipfile_writer(f) as opened_zipfile:
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/LDMSeg/lib/python3.11/site-packages/torch/serialization.py", line 315, in _open_zipfile_writer
    return container(name_or_buffer)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/LDMSeg/lib/python3.11/site-packages/torch/serialization.py", line 288, in __init__
    super().__init__(torch._C.PyTorchFileWriter(str(name)))
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Parent directory /root/autodl-tmp/Video-latent-diffusion-panoptic-segmentation/simple_diffusion/kitti/run_20250415_215902 does not exist.
