In [1]:
import sys
!{sys.executable} -m pip install import-ipynb



In [3]:
# Train the dataset

import datetime
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from functools import partial

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.nn as nn
import torch.optim as optim
from torch import nn
from torch.utils.data import DataLoader

import import_ipynb
from darknet import YoloBody
from yolo_training import (YOLOLoss, get_lr_scheduler, set_optimizer_lr, weights_init)
from callbacks import EvalCallback, LossHistory
from dataloader import YoloDataset, yolo_dataset_collate
from utils import (get_anchors, get_classes, seed_everything, show_config, worker_init_fn)
from utils_fit import fit_one_epoch

'''
When training your own object detection model, make sure to pay attention to the following key points:
Before training, carefully check whether your dataset format meets the requirements.
This library requires the dataset to be in VOC format, which means you need to prepare:

Input images: These should be .jpg files. The size does not need to be fixed, as resizing will be handled automatically before training.
Grayscale images will be automatically converted to RGB, so you don’t need to convert them manually.
If the image extension is not .jpg, you must batch convert them to .jpg before starting training.

Labels: These should be .xml files containing the target information to be detected.
Each label file should correspond to one input image file.

The trained weight files are saved in the logs folder.
Each training epoch consists of several training steps (Steps), and one gradient descent update is performed per step.
If you only train for a few steps, the weights will not be saved.
'''
if __name__ == "__main__":
    Cuda            = True
    # Seed — Used to set a fixed random seed  
    #         Ensures that each independent training run produces the same results
    seed            = 11
    distributed     = False
    sync_bn         = False
    fp16            = False
    classes_path    = 'model_data/voc_classes.txt'
    anchors_path    = 'model_data/yolo_anchors.txt'
    anchors_mask    = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    # The most important part of the model's pre-trained weights is the backbone feature extraction network,  
    # which is used for extracting features.  
    # Here, the weights of the entire model are used, so they are loaded in `train.py`.  
    # The `pretrain` setting below does not affect the loading of these weights.
    model_path      = 'model_data/yolo_weights.pth'
    # input_shape — The input shape size, must be a multiple of 32
    input_shape     = [416, 416]
    # pretrained — If model_path is set, the backbone weights do not need to be loaded separately, so the value of pretrained becomes meaningless.
    pretrained      = False
    #----------------------------------------------------------------------------------------------------------------------------#
    # The training is divided into two phases: the frozen phase and the unfrozen phase.  
    # Frozen training requires less GPU memory. If your GPU is very weak, you can set `Freeze_Epoch` equal to `UnFreeze_Epoch`,  
    # which means only frozen training will be performed.
    
    # Here are some suggested parameter settings. You can flexibly adjust them according to your own needs:
    # (1) Training from the pre-trained weights of the whole model:
    #     Adam optimizer:
    #         Init_Epoch = 0, Freeze_Epoch = 50, UnFreeze_Epoch = 100, Freeze_Train = True,
    #         optimizer_type = 'adam', Init_lr = 1e-3, weight_decay = 0.  (Frozen)
    #         Init_Epoch = 0, UnFreeze_Epoch = 100, Freeze_Train = False,
    #         optimizer_type = 'adam', Init_lr = 1e-3, weight_decay = 0.  (Unfrozen)
    #     SGD optimizer:
    #         Init_Epoch = 0, Freeze_Epoch = 50, UnFreeze_Epoch = 300, Freeze_Train = True,
    #         optimizer_type = 'sgd', Init_lr = 1e-2, weight_decay = 5e-4.  (Frozen)
    #         Init_Epoch = 0, UnFreeze_Epoch = 300, Freeze_Train = False,
    #         optimizer_type = 'sgd', Init_lr = 1e-2, weight_decay = 5e-4.  (Unfrozen)
    #         Note: UnFreeze_Epoch can be adjusted between 100 and 300.
    # (2) Setting `batch_size`:
    #     Use the largest possible value that your GPU can handle.  
    #     Insufficient GPU memory (errors like OOM or CUDA out of memory) is unrelated to dataset size — reduce `batch_size` if this occurs.  
    #     Due to the influence of BatchNorm layers, the minimum `batch_size` is 2 and cannot be 1.  
    #     Normally, it is recommended that `Freeze_batch_size` be 1–2 times the `Unfreeze_batch_size`.  
    #     Avoid setting them too far apart, as it affects automatic learning rate adjustment.
    #----------------------------------------------------------------------------------------------------------------------------#
    #------------------------------------------------------------------#
    # Frozen phase training parameters  
    # During this phase, the model's backbone is frozen, meaning the feature extraction network does not update.  
    # This phase uses less GPU memory and only fine-tunes the rest of the network.
    
    # Init_Epoch — The starting epoch of the model training. It can be set higher than `Freeze_Epoch`, e.g.:  
    #              Init_Epoch = 60, Freeze_Epoch = 50, UnFreeze_Epoch = 100  
    #              This skips the frozen phase and starts directly from epoch 60 with the adjusted learning rate.  
    #              (Used for resuming training from a checkpoint)
    
    # Freeze_Epoch — The number of epochs for the frozen training phase  
    #                (This setting is ignored if `Freeze_Train=False`)
    # Freeze_batch_size — The batch size used during frozen training  
    #                (This setting is ignored if `Freeze_Train=False`)
    #------------------------------------------------------------------#
    Init_Epoch          = 0
    Freeze_Epoch        = 50
    Freeze_batch_size   = 16
    #------------------------------------------------------------------#
    # Unfrozen phase training parameters  
    # During this phase, the model's backbone is no longer frozen, so the feature extraction network will be updated.  
    # This phase consumes more GPU memory, and all network parameters are trained.
    
    # UnFreeze_Epoch — Total number of epochs for the entire training process  
    #                  SGD usually requires a longer convergence time, so a larger `UnFreeze_Epoch` is recommended.  
    #                  Adam optimizer can use a relatively smaller `UnFreeze_Epoch`.
    
    # Unfreeze_batch_size — The batch size used during the unfrozen phase
    #------------------------------------------------------------------#
    UnFreeze_Epoch      = 300
    Unfreeze_batch_size = 8

    Freeze_Train        = True

    # Other training parameters: learning rate, optimizer, and learning rate decay settings

    # Init_lr — The maximum learning rate of the model  
    # Min_lr  — The minimum learning rate of the model, defaulting to 1% of the maximum learning rate
    Init_lr             = 1e-2
    Min_lr              = Init_lr * 0.01
    #------------------------------------------------------------------#
    # optimizer_type — Type of optimizer to use; options are 'adam' and 'sgd'  
    #                  Recommended settings:  
    #                  - For Adam optimizer: Init_lr = 1e-3  
    #                  - For SGD optimizer:  Init_lr = 1e-2
    # momentum — Momentum parameter used internally by the optimizer  
    # weight_decay — Weight decay used to prevent overfitting  
    # Adam may cause issues with weight_decay, so it is recommended to set it to 0 when using Adam.
    #------------------------------------------------------------------#
    optimizer_type      = "sgd"
    momentum            = 0.937
    weight_decay        = 5e-4
    # # lr_decay_type — Type of learning rate decay; options are 'step' and 'cos'
    lr_decay_type       = "cos"
    # save_period — Save the model weights every specified number of epochs
    save_period         = 10
    # save_dir — Folder where the model weights and log files will be saved
    save_dir            = 'logs'
    #------------------------------------------------------------------#
    # eval_flag — Whether to perform evaluation during training; the evaluation is done on the validation set  
    # eval_period — Number of epochs between each evaluation
    #------------------------------------------------------------------#
    eval_flag           = True
    eval_period         = 10
    # num_workers — Sets whether to use multi-threading for data loading  
    # Enabling this speeds up data loading but uses more memory  
    # For computers with limited memory, it can be set to 2 or 0
    num_workers         = 0

    # Get image paths and labels
    train_annotation_path   = '2007_train.txt'
    val_annotation_path     = '2007_val.txt'

    seed_everything(seed)
    #   Set the GPU to be used
    ngpus_per_node  = torch.cuda.device_count()
    if distributed:
        dist.init_process_group(backend="nccl")
        local_rank  = int(os.environ["LOCAL_RANK"])
        rank        = int(os.environ["RANK"])
        device      = torch.device("cuda", local_rank)
        if local_rank == 0:
            print(f"[{os.getpid()}] (rank = {rank}, local_rank = {local_rank}) training...")
            print("Gpu Device Count : ", ngpus_per_node)
    else:
        device          = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        local_rank      = 0
        rank            = 0

    #   Get classes and anchors
    class_names, num_classes = get_classes(classes_path)
    anchors, num_anchors     = get_anchors(anchors_path)
        
    # Create the YOLO model
    model = YoloBody(anchors_mask, num_classes, pretrained=pretrained)
    if not pretrained:
        weights_init(model)
    if model_path != '':
        if local_rank == 0:
            print('Load weights {}.'.format(model_path))
        
        #   Load according to the keys of the pre-trained weights and the model's keys.
        model_dict      = model.state_dict()
        pretrained_dict = torch.load(model_path, map_location = device)
        load_key, no_load_key, temp_dict = [], [], {}
        for k, v in pretrained_dict.items():
            if k in model_dict.keys() and np.shape(model_dict[k]) == np.shape(v):
                temp_dict[k] = v
                load_key.append(k)
            else:
                no_load_key.append(k)
        model_dict.update(temp_dict)
        model.load_state_dict(model_dict)
        #   Display the unmatched keys.
        if local_rank == 0:
            print("\nSuccessful Load Key:", str(load_key)[:500], "……\nSuccessful Load Key Num:", len(load_key))
            print("\nFail To Load Key:", str(no_load_key)[:500], "……\nFail To Load Key num:", len(no_load_key))
            print("\n\033[1;33;44mFriendly reminder: It is normal for the head part not to be loaded; however, it is an error if the backbone part is not loaded.\033[0m")

    # Get the loss function
    yolo_loss = YOLOLoss(anchors, num_classes, input_shape, Cuda, anchors_mask)
    #   Record the loss
    if local_rank == 0:
        time_str        = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S')
        log_dir         = os.path.join(save_dir, "loss_" + str(time_str))
        loss_history    = LossHistory(log_dir, model, input_shape=input_shape)
    else:
        loss_history    = None
        
    if fp16:
        from torch.cuda.amp import GradScaler as GradScaler
        scaler = GradScaler()
    else:
        scaler = None

    model_train     = model.train()
    #   Synchronize BatchNorm across multiple GPUs
    if sync_bn and ngpus_per_node > 1 and distributed:
        model_train = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model_train)
    elif sync_bn:
        print("Sync_bn is not support in one gpu or not distributed.")

    if Cuda:
        if distributed:
            #----------------------------#
            #   Parallel execution on multiple GPUs
            #----------------------------#
            model_train = model_train.cuda(local_rank)
            model_train = torch.nn.parallel.DistributedDataParallel(model_train, device_ids=[local_rank], find_unused_parameters=True)
        else:
            model_train = torch.nn.DataParallel(model)
            cudnn.benchmark = True
            model_train = model_train.cuda()

    #   Read the txt file corresponding to the dataset
    with open(train_annotation_path) as f:
        train_lines = f.readlines()
    with open(val_annotation_path) as f:
        val_lines   = f.readlines()
    num_train   = len(train_lines)
    num_val     = len(val_lines)

    if local_rank == 0:
        show_config(
            classes_path = classes_path, anchors_path = anchors_path, anchors_mask = anchors_mask, model_path = model_path, input_shape = input_shape, \
            Init_Epoch = Init_Epoch, Freeze_Epoch = Freeze_Epoch, UnFreeze_Epoch = UnFreeze_Epoch, Freeze_batch_size = Freeze_batch_size, Unfreeze_batch_size = Unfreeze_batch_size, Freeze_Train = Freeze_Train, \
            Init_lr = Init_lr, Min_lr = Min_lr, optimizer_type = optimizer_type, momentum = momentum, lr_decay_type = lr_decay_type, \
            save_period = save_period, save_dir = save_dir, num_workers = num_workers, num_train = num_train, num_val = num_val
        )
        
        wanted_step = 5e4 if optimizer_type == "sgd" else 1.5e4
        total_step  = num_train // Unfreeze_batch_size * UnFreeze_Epoch
        if total_step <= wanted_step:
            if num_train // Unfreeze_batch_size == 0:
                raise ValueError('The dataset is too small to train the model. Please expand the dataset.')
            wanted_epoch = wanted_step // (num_train // Unfreeze_batch_size) + 1
            print("\n\033[1;33;44m[Warning] When using the %s optimizer, it is recommended to set the total training steps to at least %d.\033[0m" % (optimizer_type, wanted_step))
            print("\033[1;33;44m[Warning] This run has a total of %d training samples, Unfreeze_batch_size is %d, training for %d epochs, resulting in %d total training steps.\033[0m" % (num_train, Unfreeze_batch_size, UnFreeze_Epoch, total_step))
            print("\033[1;33;44m[Warning] Since the total training steps is %d, which is less than the recommended %d, it is advised to set the total epochs to %d.\033[0m" %())

    if True:
        UnFreeze_flag = False
        #   Freeze a certain part of the model for training
        if Freeze_Train:
            for param in model.backbone.parameters():
                param.requires_grad = False

        # If not performing frozen training, directly set batch_size to Unfreeze_batch_size
        batch_size = Freeze_batch_size if Freeze_Train else Unfreeze_batch_size

        # Determine the current batch_size and adaptively adjust the learning rate
        nbs             = 64
        lr_limit_max    = 1e-3 if optimizer_type == 'adam' else 5e-2
        lr_limit_min    = 3e-4 if optimizer_type == 'adam' else 5e-4
        Init_lr_fit     = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
        Min_lr_fit      = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)

        #   Select the optimizer based on optimizer_type
        pg0, pg1, pg2 = [], [], []  
        for k, v in model.named_modules():
            if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
                pg2.append(v.bias)    
            if isinstance(v, nn.BatchNorm2d) or "bn" in k:
                pg0.append(v.weight)    
            elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
                pg1.append(v.weight)   
        optimizer = {
            'adam'  : optim.Adam(pg0, Init_lr_fit, betas = (momentum, 0.999)),
            'sgd'   : optim.SGD(pg0, Init_lr_fit, momentum = momentum, nesterov=True)
        }[optimizer_type]
        optimizer.add_param_group({"params": pg1, "weight_decay": weight_decay})
        optimizer.add_param_group({"params": pg2})

        #   Get the learning rate decay formula
        lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)
        
        #   Determine the length of each epoch
        epoch_step      = num_train // batch_size
        epoch_step_val  = num_val // batch_size
        
        if epoch_step == 0 or epoch_step_val == 0:
            raise ValueError("The dataset is too small to continue training. Please expand the dataset.")

        # Build the dataset loaders.
        train_dataset   = YoloDataset(train_lines, input_shape, num_classes, train = True)
        val_dataset     = YoloDataset(val_lines, input_shape, num_classes, train = False)
        
        if distributed:
            train_sampler   = torch.utils.data.distributed.DistributedSampler(train_dataset, shuffle=True,)
            val_sampler     = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False,)
            batch_size      = batch_size // ngpus_per_node
            shuffle         = False
        else:
            train_sampler   = None
            val_sampler     = None
            shuffle         = True

        gen             = DataLoader(train_dataset, shuffle = shuffle, batch_size = batch_size, num_workers = num_workers, pin_memory=True,
                                    drop_last=True, collate_fn=yolo_dataset_collate, sampler=train_sampler, 
                                    worker_init_fn=partial(worker_init_fn, rank=rank, seed=seed))
        gen_val         = DataLoader(val_dataset  , shuffle = shuffle, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 
                                    drop_last=True, collate_fn=yolo_dataset_collate, sampler=val_sampler, 
                                    worker_init_fn=partial(worker_init_fn, rank=rank, seed=seed))

        # Record the mAP curve during evaluation
        if local_rank == 0:
            eval_callback   = EvalCallback(model, input_shape, anchors, anchors_mask, class_names, num_classes, val_lines, log_dir, Cuda, \
                                            eval_flag=eval_flag, period=eval_period)
        else:
            eval_callback   = None
        
        # Start model training
        for epoch in range(Init_Epoch, UnFreeze_Epoch):
            # If the model has a frozen part for learning  
            # Then unfreeze it and set the corresponding parameters
            if epoch >= Freeze_Epoch and not UnFreeze_flag and Freeze_Train: # Three conditions to enter the unfreezing logic:
                batch_size = Unfreeze_batch_size
                # Current epoch is greater than or equal to Freeze_Epoch;
                # Unfreezing has not yet been performed (UnFreeze_flag == False);
                # The user has set Freeze_Train = True.
                #-------------------------------------------------------------------#
                # Determine the current batch_size and adaptively adjust the learning rate
                #-------------------------------------------------------------------#
                nbs             = 64
                lr_limit_max    = 1e-3 if optimizer_type == 'adam' else 5e-2
                lr_limit_min    = 3e-4 if optimizer_type == 'adam' else 5e-4
                Init_lr_fit     = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
                Min_lr_fit      = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)
                # Get the learning rate decay formula
                lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)
                
                for param in model.backbone.parameters():
                    param.requires_grad = True

                epoch_step      = num_train // batch_size
                epoch_step_val  = num_val // batch_size

                if epoch_step == 0 or epoch_step_val == 0:
                    raise ValueError("The dataset is too small to continue training. Please expand the dataset.")

                if distributed:
                    batch_size = batch_size // ngpus_per_node
                    
                gen             = DataLoader(train_dataset, shuffle = shuffle, batch_size = batch_size, num_workers = num_workers, pin_memory=True,
                                            drop_last=True, collate_fn=yolo_dataset_collate, sampler=train_sampler, 
                                            worker_init_fn=partial(worker_init_fn, rank=rank, seed=seed))
                gen_val         = DataLoader(val_dataset  , shuffle = shuffle, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 
                                            drop_last=True, collate_fn=yolo_dataset_collate, sampler=val_sampler, 
                                            worker_init_fn=partial(worker_init_fn, rank=rank, seed=seed))

                UnFreeze_flag = True
                
            if distributed:
                train_sampler.set_epoch(epoch)
            set_optimizer_lr(optimizer, lr_scheduler_func, epoch)

            fit_one_epoch(model_train, model, yolo_loss, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, UnFreeze_Epoch, Cuda, fp16, scaler, save_period, save_dir, local_rank)
                        
            if distributed:
                dist.barrier()

        if local_rank == 0:
            loss_history.writer.close()

initialize network with normal type
Load weights model_data/yolo_weights.pth.


  pretrained_dict = torch.load(model_path, map_location = device)



Successful Load Key: ['backbone.conv1.weight', 'backbone.bn1.weight', 'backbone.bn1.bias', 'backbone.bn1.running_mean', 'backbone.bn1.running_var', 'backbone.layer1.ds_conv.weight', 'backbone.layer1.ds_bn.weight', 'backbone.layer1.ds_bn.bias', 'backbone.layer1.ds_bn.running_mean', 'backbone.layer1.ds_bn.running_var', 'backbone.layer1.residual_0.conv1.weight', 'backbone.layer1.residual_0.bn1.weight', 'backbone.layer1.residual_0.bn1.bias', 'backbone.layer1.residual_0.bn1.running_mean', 'backbone.layer1.residual_0.bn1. ……
Successful Load Key Num: 360

Fail To Load Key: ['last_layer0.6.weight', 'last_layer0.6.bias', 'last_layer1.6.weight', 'last_layer1.6.bias', 'last_layer2.6.weight', 'last_layer2.6.bias'] ……
Fail To Load Key num: 6

[1;33;44mFriendly reminder: It is normal for the head part not to be loaded; however, it is an error if the backbone part is not loaded.[0m
Configurations:
----------------------------------------------------------------------
|                     keys |  

KeyboardInterrupt: 