Move docs to vanilla YOLO

zhiqwang · Jul 12, 2021 · 1c9431e · 1c9431e
1 parent 8867bdb
commit 1c9431e
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 25 deletions.
diff --git a/yolort/models/box_head.py b/yolort/models/box_head.py
@@ -157,7 +157,7 @@ def forward(
         """
         batch_size, _, _, _, K = head_outputs[0].shape
 
-        all_pred_logits: List[Tensor] = []
+        all_pred_logits = []
         for pred_logits in head_outputs:
             pred_logits = pred_logits.reshape(batch_size, -1, K)  # Size=(NN, HWA, K)
             all_pred_logits.append(pred_logits)

diff --git a/yolort/models/yolo.py b/yolort/models/yolo.py
@@ -21,6 +21,28 @@
 class YOLO(nn.Module):
     """
     Implements YOLO series model.
+
+    The input to the model is expected to be a batched tensors, of shape ``[N, C, H, W]``, one for each
+    image, and should be in ``0-1`` range. Different images can have different sizes.
+
+    The behavior of the model changes depending if it is in training or evaluation mode.
+
+    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+    containing:
+        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
+          between ``0`` and ``H`` and ``0`` and ``W``
+        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
+
+    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
+    losses.
+
+    During inference, the model requires only the input tensors, and returns the post-processed
+    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
+    follows:
+        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values
+          between ``0`` and ``H`` and ``0`` and ``W``
+        - labels (``Int64Tensor[N]``): the predicted labels for each image
+        - scores (``Tensor[N]``): the scores or each prediction
     """
     __annotations__ = {
         'compute_loss': SetCriterion,
@@ -35,13 +57,13 @@ def __init__(
         anchor_generator: Optional[nn.Module] = None,
         head: Optional[nn.Module] = None,
         # Training parameter
-        criterion: Optional[Callable[..., Dict[str, Tensor]]] = None,
         iou_thresh: float = 0.5,
+        criterion: Optional[Callable[..., Dict[str, Tensor]]] = None,
         # Post Process parameter
-        post_process: Optional[nn.Module] = None,
         score_thresh: float = 0.05,
         nms_thresh: float = 0.5,
         detections_per_img: int = 300,
+        post_process: Optional[nn.Module] = None,
     ):
         super().__init__()
         if not hasattr(backbone, "out_channels"):
@@ -169,28 +191,6 @@ def _yolov5_darknet_pan(
     """
     Constructs a YOLO model.
 
-    The input to the model is expected to be a batched tensors, of shape ``[N, C, H, W]``, one for each
-    image, and should be in ``0-1`` range. Different images can have different sizes.
-
-    The behavior of the model changes depending if it is in training or evaluation mode.
-
-    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
-    containing:
-        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
-          between ``0`` and ``H`` and ``0`` and ``W``
-        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
-
-    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
-    losses.
-
-    During inference, the model requires only the input tensors, and returns the post-processed
-    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
-    follows:
-        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values
-          between ``0`` and ``H`` and ``0`` and ``W``
-        - labels (``Int64Tensor[N]``): the predicted labels for each image
-        - scores (``Tensor[N]``): the scores or each prediction
-
     Example::
 
         >>> model = yolov5(pretrained=True)