ultralytics · hulkds · Mar 3, 2024 · Mar 3, 2024 · Mar 4, 2024 · Mar 4, 2024
diff --git a/docs/en/modes/train.md b/docs/en/modes/train.md
@@ -216,6 +216,7 @@ The training settings for YOLO models encompass various hyperparameters and conf
 | `box`             | `7.5`    | Weight of the box loss component in the loss function, influencing how much emphasis is placed on accurately predicting bounding box coordinates.                                                                    |
 | `cls`             | `0.5`    | Weight of the classification loss in the total loss function, affecting the importance of correct class prediction relative to other components.                                                                     |
 | `dfl`             | `1.5`    | Weight of the distribution focal loss, used in certain YOLO versions for fine-grained classification.                                                                                                                |
+| `pos_weight`      | [1]      | Weight of positive examples, used to address the class imbalance problem, must match the length of the class dimension when utilized.
 | `pose`            | `12.0`   | Weight of the pose loss in models trained for pose estimation, influencing the emphasis on accurately predicting pose keypoints.                                                                                     |
 | `kobj`            | `2.0`    | Weight of the keypoint objectness loss in pose estimation models, balancing detection confidence with pose accuracy.                                                                                                 |
 | `label_smoothing` | `0.0`    | Applies label smoothing, softening hard labels to a mix of the target label and a uniform distribution over labels, can improve generalization.                                                                      |

diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml
@@ -97,6 +97,7 @@ warmup_bias_lr: 0.1 # (float) warmup initial bias lr
 box: 7.5 # (float) box loss gain
 cls: 0.5 # (float) cls loss gain (scale with pixels)
 dfl: 1.5 # (float) dfl loss gain
+pos_weight: [1] # (list) pos_weight in pytorch BCEWithLogitsLoss
 pose: 12.0 # (float) pose loss gain
 kobj: 1.0 # (float) keypoint obj loss gain
 label_smoothing: 0.0 # (float) label smoothing (fraction)

diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py
@@ -154,7 +154,7 @@
         h = model.args  # hyperparameters
 
         m = model.model[-1]  # Detect() module
-        self.bce = nn.BCEWithLogitsLoss(reduction="none")
+
         self.hyp = h
         self.stride = m.stride  # model strides
         self.nc = m.nc  # number of classes
@@ -168,6 +168,9 @@
         self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device)
         self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)
 
+        self.pos_weight = torch.Tensor(self.hyp.pos_weight).to(torch.device(self.device))
+        self.bce = nn.BCEWithLogitsLoss(reduction="none", pos_weight=self.pos_weight)
+
     def preprocess(self, targets, batch_size, scale_tensor):
         """Preprocesses the target counts and matches with the input batch size to output a tensor."""
         if targets.shape[0] == 0: