diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 7a3e1b7ddfbc..f25b017ace8b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,4 @@ + diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 000000000000..aa3e8409da6b --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,7 @@ +# Security Policy + +We aim to make YOLOv5 🚀 as secure as possible! If you find potential vulnerabilities or have any concerns please let us know so we can investigate and take corrective action if needed. + +### Reporting a Vulnerability + +To report vulnerabilities please email us at hello@ultralytics.com or visit https://ultralytics.com/contact. Thank you! diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 5cf1613ab0cd..f2096ce17a17 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -25,9 +25,9 @@ jobs: # Timeout: https://stackoverflow.com/a/59076067/4521646 timeout-minutes: 60 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 67f51f0e8bce..8bc88e957a36 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -22,7 +22,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/rebase.yml b/.github/workflows/rebase.yml index a4db1efb2971..75c57546166b 100644 --- a/.github/workflows/rebase.yml +++ b/.github/workflows/rebase.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout the latest code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: token: ${{ secrets.ACTIONS_TOKEN }} fetch-depth: 0 # otherwise, you will fail to push refs to dest repo diff --git a/Dockerfile b/Dockerfile index 489dd04ce5c9..304e8b2801a9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ COPY requirements.txt . RUN python -m pip install --upgrade pip RUN pip uninstall -y torch torchvision torchtext RUN pip install --no-cache -r requirements.txt albumentations wandb gsutil notebook \ - torch==1.10.2+cu113 torchvision==0.11.3+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html + torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html # RUN pip install --no-cache -U torch torchvision # Create working directory @@ -19,7 +19,8 @@ RUN mkdir -p /usr/src/app WORKDIR /usr/src/app # Copy contents -COPY . /usr/src/app +RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app +# COPY . /usr/src/app # Downloads to user config dir ADD https://ultralytics.com/assets/Arial.ttf /root/.config/Ultralytics/ diff --git a/README.md b/README.md index 9f61157f4d5f..3ebc085b6c33 100644 --- a/README.md +++ b/README.md @@ -245,7 +245,7 @@ We are super excited about our first-ever Ultralytics YOLOv5 🚀 EXPORT Competi |[YOLOv5x][assets] |640 |50.7 |68.9 |766 |12.1 |4.8 |86.7 |205.7 | | | | | | | | | |[YOLOv5n6][assets] |1280 |36.0 |54.4 |153 |8.1 |2.1 |3.2 |4.6 -|[YOLOv5s6][assets] |1280 |44.8 |63.7 |385 |8.2 |3.6 |16.8 |12.6 +|[YOLOv5s6][assets] |1280 |44.8 |63.7 |385 |8.2 |3.6 |12.6 |16.8 |[YOLOv5m6][assets] |1280 |51.3 |69.3 |887 |11.1 |6.8 |35.7 |50.0 |[YOLOv5l6][assets] |1280 |53.7 |71.3 |1784 |15.8 |10.5 |76.8 |111.4 |[YOLOv5x6][assets]
+ [TTA][TTA]|1280
1536 |55.0
**55.8** |72.7
**72.7** |3136
- |26.2
- |19.4
- |140.7
- |209.8
- diff --git a/data/hyps/hyp.VOC.yaml b/data/hyps/hyp.VOC.yaml index aa952c501969..0aa4e7d9f8f5 100644 --- a/data/hyps/hyp.VOC.yaml +++ b/data/hyps/hyp.VOC.yaml @@ -4,37 +4,37 @@ # See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials # YOLOv5 Hyperparameter Evolution Results -# Best generation: 319 -# Last generation: 434 +# Best generation: 467 +# Last generation: 996 # metrics/precision, metrics/recall, metrics/mAP_0.5, metrics/mAP_0.5:0.95, val/box_loss, val/obj_loss, val/cls_loss -# 0.86236, 0.86184, 0.91274, 0.72647, 0.0077056, 0.0042449, 0.0013846 +# 0.87729, 0.85125, 0.91286, 0.72664, 0.0076739, 0.0042529, 0.0013865 -lr0: 0.0033 -lrf: 0.15184 -momentum: 0.74747 +lr0: 0.00334 +lrf: 0.15135 +momentum: 0.74832 weight_decay: 0.00025 -warmup_epochs: 3.4278 -warmup_momentum: 0.59032 -warmup_bias_lr: 0.18742 +warmup_epochs: 3.3835 +warmup_momentum: 0.59462 +warmup_bias_lr: 0.18657 box: 0.02 -cls: 0.21563 +cls: 0.21638 cls_pw: 0.5 -obj: 0.50843 -obj_pw: 0.6729 +obj: 0.51728 +obj_pw: 0.67198 iou_t: 0.2 -anchor_t: 3.4172 +anchor_t: 3.3744 fl_gamma: 0.0 -hsv_h: 0.01032 -hsv_s: 0.5562 -hsv_v: 0.28255 +hsv_h: 0.01041 +hsv_s: 0.54703 +hsv_v: 0.27739 degrees: 0.0 -translate: 0.04575 -scale: 0.73711 +translate: 0.04591 +scale: 0.75544 shear: 0.0 perspective: 0.0 flipud: 0.0 fliplr: 0.5 -mosaic: 0.87158 -mixup: 0.04294 +mosaic: 0.85834 +mixup: 0.04266 copy_paste: 0.0 -anchors: 3.3556 +anchors: 3.412 diff --git a/detect.py b/detect.py index 76f67bea1b90..ccb9fbf5103f 100644 --- a/detect.py +++ b/detect.py @@ -89,15 +89,10 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Load model device = select_device(device) - model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data) - stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine + model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) + stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size - # Half - half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA - if pt or jit: - model.model.half() if half else model.model.float() - # Dataloader if webcam: view_img = check_imshow() @@ -110,12 +105,12 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) vid_path, vid_writer = [None] * bs, [None] * bs # Run inference - model.warmup(imgsz=(1 if pt else bs, 3, *imgsz), half=half) # warmup + model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 + im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim diff --git a/export.py b/export.py index 15e92a784a50..2d4a68e62f89 100644 --- a/export.py +++ b/export.py @@ -75,18 +75,18 @@ def export_formats(): # YOLOv5 export formats - x = [['PyTorch', '-', '.pt'], - ['TorchScript', 'torchscript', '.torchscript'], - ['ONNX', 'onnx', '.onnx'], - ['OpenVINO', 'openvino', '_openvino_model'], - ['TensorRT', 'engine', '.engine'], - ['CoreML', 'coreml', '.mlmodel'], - ['TensorFlow SavedModel', 'saved_model', '_saved_model'], - ['TensorFlow GraphDef', 'pb', '.pb'], - ['TensorFlow Lite', 'tflite', '.tflite'], - ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite'], - ['TensorFlow.js', 'tfjs', '_web_model']] - return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix']) + x = [['PyTorch', '-', '.pt', True], + ['TorchScript', 'torchscript', '.torchscript', True], + ['ONNX', 'onnx', '.onnx', True], + ['OpenVINO', 'openvino', '_openvino_model', False], + ['TensorRT', 'engine', '.engine', True], + ['CoreML', 'coreml', '.mlmodel', False], + ['TensorFlow SavedModel', 'saved_model', '_saved_model', True], + ['TensorFlow GraphDef', 'pb', '.pb', True], + ['TensorFlow Lite', 'tflite', '.tflite', False], + ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False], + ['TensorFlow.js', 'tfjs', '_web_model', False]] + return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'GPU']) def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')): @@ -218,6 +218,7 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F builder = trt.Builder(logger) config = builder.create_builder_config() config.max_workspace_size = workspace * 1 << 30 + # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network = builder.create_network(flag) @@ -233,9 +234,8 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F for out in outputs: LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}') - half &= builder.platform_has_fast_fp16 - LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}') - if half: + LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}') + if builder.platform_has_fast_fp16: config.set_flag(trt.BuilderFlag.FP16) with builder.build_engine(network, config) as engine, open(f, 'wb') as t: t.write(engine.serialize()) @@ -260,9 +260,9 @@ def export_saved_model(model, im, file, dynamic, batch_size, ch, *imgsz = list(im.shape) # BCHW tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz) - im = tf.zeros((batch_size, *imgsz, 3)) # BHWC order for TensorFlow + im = tf.zeros((batch_size, *imgsz, ch)) # BHWC order for TensorFlow _ = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres) - inputs = tf.keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size) + inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if dynamic else batch_size) outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres) keras_model = tf.keras.Model(inputs=inputs, outputs=outputs) keras_model.trainable = False @@ -275,7 +275,7 @@ def export_saved_model(model, im, file, dynamic, m = m.get_concrete_function(spec) frozen_func = convert_variables_to_constants_v2(m) tfm = tf.Module() - tfm.__call__ = tf.function(lambda x: frozen_func(x), [spec]) + tfm.__call__ = tf.function(lambda x: frozen_func(x)[0], [spec]) tfm.__call__(im) tf.saved_model.save( tfm, @@ -331,7 +331,7 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te converter.target_spec.supported_types = [] converter.inference_input_type = tf.uint8 # or tf.int8 converter.inference_output_type = tf.uint8 # or tf.int8 - converter.experimental_new_quantizer = False + converter.experimental_new_quantizer = True f = str(file).replace('.pt', '-int8.tflite') tflite_model = converter.convert() @@ -494,7 +494,7 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path' if int8 or edgetpu: # TFLite --int8 bug https://github.com/ultralytics/yolov5/issues/5707 check_requirements(('flatbuffers==1.12',)) # required before `import tensorflow` assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.' - model, f[5] = export_saved_model(model, im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs, + model, f[5] = export_saved_model(model.cpu(), im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs, agnostic_nms=agnostic_nms or tfjs, topk_per_class=topk_per_class, topk_all=topk_all, conf_thres=conf_thres, iou_thres=iou_thres) # keras model if pb or tfjs: # pb prerequisite to tfjs diff --git a/models/common.py b/models/common.py index 0dae0244e932..066f8774d3c3 100644 --- a/models/common.py +++ b/models/common.py @@ -31,7 +31,7 @@ def autopad(k, p=None): # kernel, padding # Pad to 'same' if p is None: - p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + p = k // 2 if isinstance(k, int) else (x // 2 for x in k) # auto-pad return p @@ -133,7 +133,7 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, nu self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) - # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) + # self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n))) def forward(self, x): return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) @@ -194,7 +194,7 @@ def forward(self, x): warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning y1 = self.m(x) y2 = self.m(y1) - return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) + return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) class Focus(nn.Module): @@ -205,7 +205,7 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, k # self.contract = Contract(gain=2) def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) - return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) + return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1)) # return self.conv(self.contract(x)) @@ -219,7 +219,7 @@ def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, s def forward(self, x): y = self.cv1(x) - return torch.cat([y, self.cv2(y)], 1) + return torch.cat((y, self.cv2(y)), 1) class GhostBottleneck(nn.Module): @@ -277,7 +277,7 @@ def forward(self, x): class DetectMultiBackend(nn.Module): # YOLOv5 MultiBackend class for python inference on various backends - def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): + def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False): # Usage: # PyTorch: weights = *.pt # TorchScript: *.torchscript @@ -297,6 +297,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults w = attempt_download(w) # download if not local + fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 if data: # data.yaml path (optional) with open(data, errors='ignore') as f: names = yaml.safe_load(f)['names'] # class names @@ -305,11 +306,13 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): model = attempt_load(weights if isinstance(weights, list) else w, map_location=device) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names + model.half() if fp16 else model.float() self.model = model # explicitly assign for to(), cpu(), cuda(), half() elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata model = torch.jit.load(w, _extra_files=extra_files) + model.half() if fp16 else model.float() if extra_files['config.txt']: d = json.loads(extra_files['config.txt']) # extra_files dict stride, names = int(d['stride']), d['names'] @@ -342,12 +345,15 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): with open(w, 'rb') as f, trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(f.read()) bindings = OrderedDict() + fp16 = False # default updated below for index in range(model.num_bindings): name = model.get_binding_name(index) dtype = trt.nptype(model.get_binding_dtype(index)) shape = tuple(model.get_binding_shape(index)) data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device) bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) + if model.binding_is_input(index) and dtype == np.float16: + fp16 = True binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) context = model.create_execution_context() batch_size = bindings['images'].shape[0] @@ -435,7 +441,7 @@ def forward(self, im, augment=False, visualize=False, val=False): else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) if self.saved_model: # SavedModel - y = (self.model(im, training=False) if self.keras else self.model(im)[0]).numpy() + y = (self.model(im, training=False) if self.keras else self.model(im)).numpy() elif self.pb: # GraphDef y = self.frozen_func(x=self.tf.constant(im)).numpy() else: # Lite or Edge TPU @@ -452,15 +458,17 @@ def forward(self, im, augment=False, visualize=False, val=False): y = (y.astype(np.float32) - zero_point) * scale # re-scale y[..., :4] *= [w, h, w, h] # xywh normalized to pixels - y = torch.tensor(y) if isinstance(y, np.ndarray) else y + if isinstance(y, np.ndarray): + y = torch.tensor(y, device=self.device) return (y, []) if val else y - def warmup(self, imgsz=(1, 3, 640, 640), half=False): + def warmup(self, imgsz=(1, 3, 640, 640)): # Warmup model by running inference once - if self.pt or self.jit or self.onnx or self.engine: # warmup types - if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models - im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image - self.forward(im) # warmup + if any((self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb)): # warmup types + if self.device.type != 'cpu': # only warmup GPU models + im = torch.zeros(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input + for _ in range(2 if self.jit else 1): # + self.forward(im) # warmup @staticmethod def model_type(p='path/to/model.pt'): @@ -541,10 +549,9 @@ def forward(self, imgs, size=640, augment=False, profile=False): g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update - shape1 = [make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)] # inference shape - x = [letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs] # pad - x = np.stack(x, 0) if n > 1 else x[0][None] # stack - x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW + shape1 = [make_divisible(x, self.stride) if self.pt else size for x in np.array(shape1).max(0)] # inf shape + x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad + x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 t.append(time_sync()) diff --git a/models/experimental.py b/models/experimental.py index 463e5514a06e..1230f4656c8f 100644 --- a/models/experimental.py +++ b/models/experimental.py @@ -94,21 +94,22 @@ def attempt_load(weights, map_location=None, inplace=True, fuse=True): model = Ensemble() for w in weights if isinstance(weights, list) else [weights]: ckpt = torch.load(attempt_download(w), map_location=map_location) # load - if fuse: - model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model - else: - model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval()) # without layer fuse + ckpt = (ckpt.get('ema') or ckpt['model']).float() # FP32 model + model.append(ckpt.fuse().eval() if fuse else ckpt.eval()) # fused or un-fused model in eval mode # Compatibility updates for m in model.modules(): - if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]: - m.inplace = inplace # pytorch 1.7.0 compatibility - if type(m) is Detect: + t = type(m) + if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model): + m.inplace = inplace # torch 1.7.0 compatibility + if t is Detect: if not isinstance(m.anchor_grid, list): # new Detect Layer compatibility delattr(m, 'anchor_grid') setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) - elif type(m) is Conv: - m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility + elif t is Conv: + m._non_persistent_buffers_set = set() # torch 1.6.0 compatibility + elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): + m.recompute_scale_factor = None # torch 1.11.0 compatibility if len(model) == 1: return model[-1] # return model diff --git a/models/tf.py b/models/tf.py index 74681e403afd..728907f8fb47 100644 --- a/models/tf.py +++ b/models/tf.py @@ -222,19 +222,21 @@ def call(self, inputs): x.append(self.m[i](inputs[i])) # x(bs,20,20,255) to x(bs,3,20,20,85) ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i] - x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3]) + x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no]) if not self.training: # inference y = tf.sigmoid(x[i]) - xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy - wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] + grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5 + anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4 + xy = (y[..., 0:2] * 2 + grid) * self.stride[i] # xy + wh = y[..., 2:4] ** 2 * anchor_grid # Normalize xywh to 0-1 to reduce calibration error xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) y = tf.concat([xy, wh, y[..., 4:]], -1) z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no])) - return x if self.training else (tf.concat(z, 1), x) + return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), x) @staticmethod def _make_grid(nx=20, ny=20): diff --git a/models/yolo.py b/models/yolo.py index f659a04545b9..09215101e8a0 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -62,9 +62,10 @@ def forward(self, x): y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 - xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy - wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh - y = torch.cat((xy, wh, y[..., 4:]), -1) + xy, wh, conf = y.tensor_split((2, 4), 4) + xy = (xy * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy + wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh + y = torch.cat((xy, wh, conf), 4) z.append(y.view(bs, -1, self.no)) return x if self.training else (torch.cat(z, 1), x) @@ -110,8 +111,8 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, i s = 256 # 2x min stride m.inplace = self.inplace m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward + check_anchor_order(m) # must be in pixel-space (not grid-space) m.anchors /= m.stride.view(-1, 1, 1) - check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once diff --git a/train.py b/train.py index d8df31b72282..60be962d447f 100644 --- a/train.py +++ b/train.py @@ -268,7 +268,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Start training t0 = time.time() - nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) + nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class diff --git a/tutorial.ipynb b/tutorial.ipynb index 09b2b33bda6f..1479a164cd8e 100644 --- a/tutorial.ipynb +++ b/tutorial.ipynb @@ -420,7 +420,7 @@ "name": "stdout", "text": [ "YOLOv5 🚀 v6.0-48-g84a8099 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n", - "Setup complete ✅\n" + "Setup complete ✅ (2 CPUs, 12.7 GB RAM, 42.2/166.8 GB disk)\n" ] } ] @@ -731,7 +731,7 @@ "output_type": "stream", "name": "stdout", "text": [ - "\u001b[34m\u001b[1mtrain: \u001b[0mweights=yolov5s.pt, cfg=, data=coco128.yaml, hyp=data/hyps/hyp.scratch.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=0, save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest\n", + "\u001b[34m\u001b[1mtrain: \u001b[0mweights=yolov5s.pt, cfg=, data=coco128.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=0, save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest\n", "\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n", "YOLOv5 🚀 v6.0-48-g84a8099 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n", "\n", @@ -1078,7 +1078,7 @@ "source": [ "# VOC\n", "for b, m in zip([64, 64, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']): # zip(batch_size, model)\n", - " !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}" + " !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.VOC.yaml --project VOC --name {m}" ], "execution_count": null, "outputs": [] diff --git a/utils/__init__.py b/utils/__init__.py index 4658ed6473cd..a63c473a4340 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -21,14 +21,13 @@ def notebook_init(verbose=True): if is_colab(): shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory + # System info if verbose: - # System info - # gb = 1 / 1000 ** 3 # bytes to GB - gib = 1 / 1024 ** 3 # bytes to GiB + gb = 1 << 30 # bytes to GiB (1024 ** 3) ram = psutil.virtual_memory().total total, used, free = shutil.disk_usage("/") display.clear_output() - s = f'({os.cpu_count()} CPUs, {ram * gib:.1f} GB RAM, {(total - free) * gib:.1f}/{total * gib:.1f} GB disk)' + s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)' else: s = '' diff --git a/utils/autoanchor.py b/utils/autoanchor.py index 27d6fb68bb38..77518abe9889 100644 --- a/utils/autoanchor.py +++ b/utils/autoanchor.py @@ -17,10 +17,10 @@ def check_anchor_order(m): # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary - a = m.anchors.prod(-1).view(-1) # anchor area + a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer da = a[-1] - a[0] # delta a ds = m.stride[-1] - m.stride[0] # delta s - if da.sign() != ds.sign(): # same order + if da and (da.sign() != ds.sign()): # same order LOGGER.info(f'{PREFIX}Reversing anchor order') m.anchors[:] = m.anchors.flip(0) @@ -40,7 +40,8 @@ def metric(k): # compute metric bpr = (best > 1 / thr).float().mean() # best possible recall return bpr, aat - anchors = m.anchors.clone() * m.stride.to(m.anchors.device).view(-1, 1, 1) # current anchors + stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides + anchors = m.anchors.clone() * stride # current anchors bpr, aat = metric(anchors.cpu().view(-1, 2)) s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). ' if bpr > 0.98: # threshold to recompute @@ -55,11 +56,13 @@ def metric(k): # compute metric new_bpr = metric(anchors)[0] if new_bpr > bpr: # replace anchors anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) - m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss - check_anchor_order(m) - LOGGER.info(f'{PREFIX}New anchors saved to model. Update model *.yaml to use these anchors in the future.') + m.anchors[:] = anchors.clone().view_as(m.anchors) + check_anchor_order(m) # must be in pixel-space (not grid-space) + m.anchors /= stride + s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)' else: - LOGGER.info(f'{PREFIX}Original anchors better than new anchors. Proceeding with original anchors.') + s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)' + LOGGER.info(emojis(s)) def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): @@ -120,19 +123,21 @@ def print_results(k, verbose=True): # Filter i = (wh0 < 3.0).any(1).sum() if i: - LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') + LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found: {i} of {len(wh0)} labels are < 3 pixels in size') wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 - # Kmeans calculation - LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...') - s = wh.std(0) # sigmas for whitening - k = kmeans(wh / s, n, iter=30)[0] * s # points - if len(k) != n: # kmeans may return fewer points than requested if wh is insufficient or too similar - LOGGER.warning(f'{PREFIX}WARNING: scipy.cluster.vq.kmeans returned only {len(k)} of {n} requested points') + # Kmeans init + try: + LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...') + assert n <= len(wh) # apply overdetermined constraint + s = wh.std(0) # sigmas for whitening + k = kmeans(wh / s, n, iter=30)[0] * s # points + assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar + except Exception: + LOGGER.warning(f'{PREFIX}WARNING: switching strategies from kmeans to random init') k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init - wh = torch.tensor(wh, dtype=torch.float32) # filtered - wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered + wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0)) k = print_results(k, verbose=False) # Plot @@ -149,7 +154,7 @@ def print_results(k, verbose=True): # Evolve f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma - pbar = tqdm(range(gen), desc=f'{PREFIX}Evolving anchors with Genetic Algorithm:') # progress bar + pbar = tqdm(range(gen), bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar for _ in pbar: v = np.ones(sh) while (v == 1).all(): # mutate until a change occurs (prevent duplicates) diff --git a/utils/autobatch.py b/utils/autobatch.py index cb94f041e95d..e53b4787b87d 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -34,11 +34,12 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') return batch_size + gb = 1 << 30 # bytes to GiB (1024 ** 3) d = str(device).upper() # 'CUDA:0' properties = torch.cuda.get_device_properties(device) # device properties - t = properties.total_memory / 1024 ** 3 # (GiB) - r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GiB) - a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GiB) + t = properties.total_memory / gb # (GiB) + r = torch.cuda.memory_reserved(device) / gb # (GiB) + a = torch.cuda.memory_allocated(device) / gb # (GiB) f = t - (r + a) # free inside reserved LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') diff --git a/utils/benchmarks.py b/utils/benchmarks.py index 962df812a9d3..bdbbdc43b639 100644 --- a/utils/benchmarks.py +++ b/utils/benchmarks.py @@ -19,6 +19,7 @@ Requirements: $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPU + $ pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com # TensorRT Usage: $ python utils/benchmarks.py --weights yolov5s.pt --img 640 @@ -41,20 +42,29 @@ import val from utils import notebook_init from utils.general import LOGGER, print_args +from utils.torch_utils import select_device def run(weights=ROOT / 'yolov5s.pt', # weights path imgsz=640, # inference size (pixels) batch_size=1, # batch size data=ROOT / 'data/coco128.yaml', # dataset.yaml path + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + half=False, # use FP16 half-precision inference ): y, t = [], time.time() formats = export.export_formats() - for i, (name, f, suffix) in formats.iterrows(): # index, (name, file, suffix) + device = select_device(device) + for i, (name, f, suffix, gpu) in formats.iterrows(): # index, (name, file, suffix, gpu-capable) try: - w = weights if f == '-' else export.run(weights=weights, imgsz=[imgsz], include=[f], device='cpu')[-1] + if device.type != 'cpu': + assert gpu, f'{name} inference not supported on GPU' + if f == '-': + w = weights # PyTorch format + else: + w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # all others assert suffix in str(w), 'export failed' - result = val.run(data, w, batch_size, imgsz=imgsz, plots=False, device='cpu', task='benchmark') + result = val.run(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half) metrics = result[0] # metrics (mp, mr, map50, map, *losses(box, obj, cls)) speeds = result[2] # times (preprocess, inference, postprocess) y.append([name, metrics[3], speeds[1]]) # mAP, t_inference @@ -78,6 +88,8 @@ def parse_opt(): parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--batch-size', type=int, default=1, help='batch size') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') opt = parser.parse_args() print_args(FILE.stem, opt) return opt diff --git a/utils/datasets.py b/utils/datasets.py index e132e04f6d9d..8627344af7b4 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -15,6 +15,7 @@ from multiprocessing.pool import Pool, ThreadPool from pathlib import Path from threading import Thread +from urllib.parse import urlparse from zipfile import ZipFile import cv2 @@ -33,8 +34,9 @@ # Parameters HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' -IMG_FORMATS = ['bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp'] # include image suffixes -VID_FORMATS = ['asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'wmv'] # include video suffixes +IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp' # include image suffixes +VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes +BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}' # tqdm bar format # Get orientation exif tag for orientation in ExifTags.TAGS.keys(): @@ -300,7 +302,7 @@ def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True): for i, s in enumerate(sources): # index, source # Start thread to read frames from video stream st = f'{i + 1}/{n}: {s}... ' - if 'youtube.com/' in s or 'youtu.be/' in s: # if source is YouTube video + if urlparse(s).hostname in ('youtube.com', 'youtu.be'): # if source is YouTube video check_requirements(('pafy', 'youtube_dl==2020.12.2')) import pafy s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL @@ -407,19 +409,19 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) else: raise Exception(f'{prefix}{p} does not exist') - self.img_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) + self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib - assert self.img_files, f'{prefix}No images found' + assert self.im_files, f'{prefix}No images found' except Exception as e: raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}') # Check cache - self.label_files = img2label_paths(self.img_files) # labels + self.label_files = img2label_paths(self.im_files) # labels cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') try: cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict assert cache['version'] == self.cache_version # same version - assert cache['hash'] == get_hash(self.label_files + self.img_files) # same hash + assert cache['hash'] == get_hash(self.label_files + self.im_files) # same hash except Exception: cache, exists = self.cache_labels(cache_path, prefix), False # cache @@ -427,7 +429,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total if exists: d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupt" - tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results + tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=BAR_FORMAT) # display cache results if cache['msgs']: LOGGER.info('\n'.join(cache['msgs'])) # display warnings assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}' @@ -437,7 +439,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r labels, shapes, self.segments = zip(*cache.values()) self.labels = list(labels) self.shapes = np.array(shapes, dtype=np.float64) - self.img_files = list(cache.keys()) # update + self.im_files = list(cache.keys()) # update self.label_files = img2label_paths(cache.keys()) # update n = len(shapes) # number of images bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index @@ -466,7 +468,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio irect = ar.argsort() - self.img_files = [self.img_files[i] for i in irect] + self.im_files = [self.im_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.labels = [self.labels[i] for i in irect] self.shapes = s[irect] # wh @@ -485,24 +487,20 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride # Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources) - self.imgs, self.img_npy = [None] * n, [None] * n + self.ims = [None] * n + self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files] if cache_images: - if cache_images == 'disk': - self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy') - self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files] - self.im_cache_dir.mkdir(parents=True, exist_ok=True) gb = 0 # Gigabytes of cached images - self.img_hw0, self.img_hw = [None] * n, [None] * n - results = ThreadPool(NUM_THREADS).imap(self.load_image, range(n)) - pbar = tqdm(enumerate(results), total=n) + self.im_hw0, self.im_hw = [None] * n, [None] * n + fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image + results = ThreadPool(NUM_THREADS).imap(fcn, range(n)) + pbar = tqdm(enumerate(results), total=n, bar_format=BAR_FORMAT) for i, x in pbar: if cache_images == 'disk': - if not self.img_npy[i].exists(): - np.save(self.img_npy[i].as_posix(), x[0]) - gb += self.img_npy[i].stat().st_size + gb += self.npy_files[i].stat().st_size else: # 'ram' - self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) - gb += self.imgs[i].nbytes + self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) + gb += self.ims[i].nbytes pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})' pbar.close() @@ -512,8 +510,8 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..." with Pool(NUM_THREADS) as pool: - pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))), - desc=desc, total=len(self.img_files)) + pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))), + desc=desc, total=len(self.im_files), bar_format=BAR_FORMAT) for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: nm += nm_f nf += nf_f @@ -530,8 +528,8 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): LOGGER.info('\n'.join(msgs)) if nf == 0: LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}') - x['hash'] = get_hash(self.label_files + self.img_files) - x['results'] = nf, nm, ne, nc, len(self.img_files) + x['hash'] = get_hash(self.label_files + self.im_files) + x['results'] = nf, nm, ne, nc, len(self.im_files) x['msgs'] = msgs # warnings x['version'] = self.cache_version # cache version try: @@ -543,7 +541,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): return x def __len__(self): - return len(self.img_files) + return len(self.im_files) # def __iter__(self): # self.count = -1 @@ -622,17 +620,15 @@ def __getitem__(self, index): img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) - return torch.from_numpy(img), labels_out, self.img_files[index], shapes + return torch.from_numpy(img), labels_out, self.im_files[index], shapes def load_image(self, i): - # loads 1 image from dataset index 'i', returns (im, original hw, resized hw) - im = self.imgs[i] + # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw) + im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i], if im is None: # not cached in RAM - npy = self.img_npy[i] - if npy and npy.exists(): # load npy - im = np.load(npy) + if fn.exists(): # load npy + im = np.load(fn) else: # read image - f = self.img_files[i] im = cv2.imread(f) # BGR assert im is not None, f'Image Not Found {f}' h0, w0 = im.shape[:2] # orig hw @@ -643,7 +639,13 @@ def load_image(self, i): interpolation=cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA) return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized else: - return self.imgs[i], self.img_hw0[i], self.img_hw[i] # im, hw_original, hw_resized + return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized + + def cache_images_to_disk(self, i): + # Saves an image as an *.npy file for faster loading + f = self.npy_files[i] + if not f.exists(): + np.save(f.as_posix(), cv2.imread(self.im_files[i])) def load_mosaic(self, index): # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic @@ -777,16 +779,16 @@ def load_mosaic9(self, index): @staticmethod def collate_fn(batch): - img, label, path, shapes = zip(*batch) # transposed + im, label, path, shapes = zip(*batch) # transposed for i, lb in enumerate(label): lb[:, 0] = i # add target image index for build_targets() - return torch.stack(img, 0), torch.cat(label, 0), path, shapes + return torch.stack(im, 0), torch.cat(label, 0), path, shapes @staticmethod def collate_fn4(batch): img, label, path, shapes = zip(*batch) # transposed n = len(shapes) // 4 - img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] + im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) @@ -800,13 +802,13 @@ def collate_fn4(batch): else: im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2) lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s - img4.append(im) + im4.append(im) label4.append(lb) for i, lb in enumerate(label4): lb[:, 0] = i # add target image index for build_targets() - return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4 + return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4 # Ancillary functions -------------------------------------------------------------------------------------------------- @@ -906,7 +908,7 @@ def verify_image_label(args): nf = 1 # label found with open(lb_file) as f: lb = [x.split() for x in f.read().strip().splitlines() if len(x)] - if any([len(x) > 8 for x in lb]): # is segment + if any(len(x) > 6 for x in lb): # is segment classes = np.array([x[0] for x in lb], dtype=np.float32) segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) @@ -999,12 +1001,12 @@ def hub_ops(f, max_dim=1920): 'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()), 'per_class': (x > 0).sum(0).tolist()}, 'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in - zip(dataset.img_files, dataset.labels)]} + zip(dataset.im_files, dataset.labels)]} if hub: im_dir = hub_dir / 'images' im_dir.mkdir(parents=True, exist_ok=True) - for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'): + for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.im_files), total=dataset.n, desc='HUB Ops'): pass # Profile diff --git a/utils/general.py b/utils/general.py index 3044b9c1ae78..e8b3b05c5fe1 100755 --- a/utils/general.py +++ b/utils/general.py @@ -15,6 +15,7 @@ import signal import time import urllib +from datetime import datetime from itertools import repeat from multiprocessing.pool import ThreadPool from pathlib import Path @@ -45,6 +46,7 @@ pd.options.display.max_columns = 10 cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads +os.environ['OMP_NUM_THREADS'] = str(NUM_THREADS) # OpenMP max threads (PyTorch and SciPy) def is_kaggle(): @@ -121,13 +123,15 @@ def _timeout_handler(self, signum, frame): raise TimeoutError(self.timeout_message) def __enter__(self): - signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM - signal.alarm(self.seconds) # start countdown for SIGALRM to be raised + if platform.system() != 'Windows': # not supported on Windows + signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM + signal.alarm(self.seconds) # start countdown for SIGALRM to be raised def __exit__(self, exc_type, exc_val, exc_tb): - signal.alarm(0) # Cancel SIGALRM if it's scheduled - if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError - return True + if platform.system() != 'Windows': + signal.alarm(0) # Cancel SIGALRM if it's scheduled + if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError + return True class WorkingDirectory(contextlib.ContextDecorator): @@ -220,13 +224,26 @@ def emojis(str=''): return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str +def file_age(path=__file__): + # Return days since last file update + dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta + return dt.days # + dt.seconds / 86400 # fractional days + + +def file_update_date(path=__file__): + # Return human-readable file modification date, i.e. '2021-3-26' + t = datetime.fromtimestamp(Path(path).stat().st_mtime) + return f'{t.year}-{t.month}-{t.day}' + + def file_size(path): # Return file/dir size (MB) + mb = 1 << 20 # bytes to MiB (1024 ** 2) path = Path(path) if path.is_file(): - return path.stat().st_size / 1E6 + return path.stat().st_size / mb elif path.is_dir(): - return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6 + return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb else: return 0.0 @@ -241,6 +258,14 @@ def check_online(): return False +def git_describe(path=ROOT): # path must be a directory + # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe + try: + return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1] + except Exception: + return '' + + @try_except @WorkingDirectory(ROOT) def check_git_status(): diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 86ccf38443a9..866bdc4be2f5 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -47,7 +47,7 @@ def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', # metrics 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2'] # params - self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95',] + self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95'] for k in LOGGERS: setattr(self, k, None) # init empty logger dictionary self.csv = True # always log to csv @@ -56,7 +56,7 @@ def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, if not wandb: prefix = colorstr('Weights & Biases: ') s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)" - print(emojis(s)) + self.logger.info(emojis(s)) # TensorBoard s = self.save_dir diff --git a/utils/loggers/wandb/sweep.yaml b/utils/loggers/wandb/sweep.yaml index c7790d75f6b2..688b1ea0285f 100644 --- a/utils/loggers/wandb/sweep.yaml +++ b/utils/loggers/wandb/sweep.yaml @@ -88,7 +88,7 @@ parameters: fl_gamma: distribution: uniform min: 0.0 - max: 0.1 + max: 4.0 hsv_h: distribution: uniform min: 0.0 diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py index 3835436543d2..786e58a19972 100644 --- a/utils/loggers/wandb/wandb_utils.py +++ b/utils/loggers/wandb/wandb_utils.py @@ -403,7 +403,7 @@ def create_dataset_table(self, dataset: LoadImagesAndLabels, class_to_id: Dict[i # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging artifact = wandb.Artifact(name=name, type="dataset") img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None - img_files = tqdm(dataset.img_files) if not img_files else img_files + img_files = tqdm(dataset.im_files) if not img_files else img_files for img_file in img_files: if Path(img_file).is_dir(): artifact.add_dir(img_file, name='data/images') diff --git a/utils/loss.py b/utils/loss.py index 5aa9f017d2af..0f0137817955 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -89,9 +89,10 @@ def forward(self, pred, true): class ComputeLoss: + sort_obj_iou = False + # Compute losses def __init__(self, model, autobalance=False): - self.sort_obj_iou = False device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters @@ -111,26 +112,28 @@ def __init__(self, model, autobalance=False): self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance + self.device = device for k in 'na', 'nc', 'nl', 'anchors': setattr(self, k, getattr(det, k)) - def __call__(self, p, targets): # predictions, targets, model - device = targets.device - lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) + def __call__(self, p, targets): # predictions, targets + lcls = torch.zeros(1, device=self.device) # class loss + lbox = torch.zeros(1, device=self.device) # box loss + lobj = torch.zeros(1, device=self.device) # object loss tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions b, a, gj, gi = indices[i] # image, anchor, gridy, gridx - tobj = torch.zeros_like(pi[..., 0], device=device) # target obj + tobj = torch.zeros(pi.shape[:4], device=self.device) # target obj n = b.shape[0] # number of targets if n: - ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # target-subset of predictions # Regression - pxy = ps[:, :2].sigmoid() * 2 - 0.5 - pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + pxy = pxy.sigmoid() * 2 - 0.5 + pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss @@ -144,9 +147,9 @@ def __call__(self, p, targets): # predictions, targets, model # Classification if self.nc > 1: # cls loss (only if multiple classes) - t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets + t = torch.full_like(pcls, self.cn, device=self.device) # targets t[range(n), tcls[i]] = self.cp - lcls += self.BCEcls(ps[:, 5:], t) # BCE + lcls += self.BCEcls(pcls, t) # BCE # Append targets to text file # with open('targets.txt', 'a') as file: @@ -170,15 +173,15 @@ def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch = [], [], [], [] - gain = torch.ones(7, device=targets.device) # normalized to gridspace gain - ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) + gain = torch.ones(7, device=self.device) # normalized to gridspace gain + ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices g = 0.5 # bias off = torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm - ], device=targets.device).float() * g # offsets + ], device=self.device).float() * g # offsets for i in range(self.nl): anchors = self.anchors[i] @@ -206,14 +209,12 @@ def build_targets(self, p, targets): offsets = 0 # Define - b, c = t[:, :2].long().T # image, class - gxy = t[:, 2:4] # grid xy - gwh = t[:, 4:6] # grid wh + bc, gxy, gwh, a = t.unsafe_chunk(4, dim=1) # (image, class), grid xy, grid wh, anchors + a, (b, c) = a.long().view(-1), bc.long().T # anchors, image, class gij = (gxy - offsets).long() - gi, gj = gij.T # grid xy indices + gi, gj = gij.T # grid indices # Append - a = t[:, 6].long() # anchor indices indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors diff --git a/utils/plots.py b/utils/plots.py index 6c3f5bcaef37..a30c0faf962a 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -7,6 +7,7 @@ import os from copy import copy from pathlib import Path +from urllib.error import URLError import cv2 import matplotlib @@ -55,11 +56,13 @@ def check_pil_font(font=FONT, size=10): try: return ImageFont.truetype(str(font) if font.exists() else font.name, size) except Exception: # download if missing - check_font(font) try: + check_font(font) return ImageFont.truetype(str(font), size) except TypeError: check_requirements('Pillow>=8.4.0') # known issue https://github.com/ultralytics/yolov5/issues/5374 + except URLError: # not online + return ImageFont.load_default() class Annotator: @@ -455,7 +458,7 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=''): plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200) -def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False, save=True): +def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True): # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop xyxy = torch.tensor(xyxy).view(-1, 4) b = xyxy2xywh(xyxy) # boxes @@ -467,5 +470,7 @@ def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BG crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] if save: file.parent.mkdir(parents=True, exist_ok=True) # make directory - cv2.imwrite(str(increment_path(file).with_suffix('.jpg')), crop) + f = str(increment_path(file).with_suffix('.jpg')) + # cv2.imwrite(f, crop) # https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue + Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)).save(f, quality=95, subsampling=0) return crop diff --git a/utils/torch_utils.py b/utils/torch_utils.py index c5257c6ebfeb..efcacc9ca735 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -3,7 +3,6 @@ PyTorch utils """ -import datetime import math import os import platform @@ -12,14 +11,13 @@ import warnings from contextlib import contextmanager from copy import deepcopy -from pathlib import Path import torch import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F -from utils.general import LOGGER +from utils.general import LOGGER, file_update_date, git_describe try: import thop # for FLOPs computation @@ -32,9 +30,7 @@ @contextmanager def torch_distributed_zero_first(local_rank: int): - """ - Decorator to make all processes in distributed training wait for each local_master to do something. - """ + # Decorator to make all processes in distributed training wait for each local_master to do something if local_rank not in [-1, 0]: dist.barrier(device_ids=[local_rank]) yield @@ -42,21 +38,6 @@ def torch_distributed_zero_first(local_rank: int): dist.barrier(device_ids=[0]) -def date_modified(path=__file__): - # return human-readable file modification date, i.e. '2021-3-26' - t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime) - return f'{t.year}-{t.month}-{t.day}' - - -def git_describe(path=Path(__file__).parent): # path must be a directory - # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe - s = f'git -C {path} describe --tags --long --always' - try: - return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1] - except subprocess.CalledProcessError: - return '' # not a git repository - - def device_count(): # Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Only works on Linux. assert platform.system() == 'Linux', 'device_count() function only works on Linux' @@ -69,7 +50,7 @@ def device_count(): def select_device(device='', batch_size=0, newline=True): # device = 'cpu' or '0' or '0,1,2,3' - s = f'YOLOv5 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string + s = f'YOLOv5 🚀 {git_describe() or file_update_date()} torch {torch.__version__} ' # string device = str(device).strip().lower().replace('cuda:', '') # to string, 'cuda:0' to '0' cpu = device == 'cpu' if cpu: @@ -88,7 +69,7 @@ def select_device(device='', batch_size=0, newline=True): space = ' ' * (len(s) + 1) for i, d in enumerate(devices): p = torch.cuda.get_device_properties(i) - s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2:.0f}MiB)\n" # bytes to MB + s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB else: s += 'CPU\n' @@ -99,7 +80,7 @@ def select_device(device='', batch_size=0, newline=True): def time_sync(): - # pytorch-accurate time + # PyTorch-accurate time if torch.cuda.is_available(): torch.cuda.synchronize() return time.time() @@ -205,7 +186,7 @@ def prune(model, amount=0.3): def fuse_conv_and_bn(conv, bn): - # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + # Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ fusedconv = nn.Conv2d(conv.in_channels, conv.out_channels, kernel_size=conv.kernel_size, @@ -214,12 +195,12 @@ def fuse_conv_and_bn(conv, bn): groups=conv.groups, bias=True).requires_grad_(False).to(conv.weight.device) - # prepare filters + # Prepare filters w_conv = conv.weight.clone().view(conv.out_channels, -1) w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) - # prepare spatial bias + # Prepare spatial bias b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) @@ -252,7 +233,7 @@ def model_info(model, verbose=False, img_size=640): def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) - # scales img(bs,3,y,x) by ratio constrained to gs-multiple + # Scales img(bs,3,y,x) by ratio constrained to gs-multiple if ratio == 1.0: return img else: @@ -302,13 +283,13 @@ class ModelEMA: For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage """ - def __init__(self, model, decay=0.9999, updates=0): + def __init__(self, model, decay=0.9999, tau=2000, updates=0): # Create EMA self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA # if next(model.parameters()).device.type != 'cpu': # self.ema.half() # FP16 EMA self.updates = updates # number of EMA updates - self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) + self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs) for p in self.ema.parameters(): p.requires_grad_(False) diff --git a/val.py b/val.py index 78abbda8231a..2dd2aec679f9 100644 --- a/val.py +++ b/val.py @@ -87,7 +87,7 @@ def process_batch(detections, labels, iouv): matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - matches = torch.Tensor(matches).to(iouv.device) + matches = torch.from_numpy(matches).to(iouv.device) correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv return correct @@ -125,7 +125,6 @@ def run(data, training = model is not None if training: # called by train.py device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model - half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() else: # called directly @@ -136,33 +135,32 @@ def run(data, (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data) - stride, pt, jit, onnx, engine = model.stride, model.pt, model.jit, model.onnx, model.engine + model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) + stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size - half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA - if pt or jit: - model.model.half() if half else model.model.float() - elif engine: + half = model.fp16 # FP16 supported on limited backends with CUDA + if engine: batch_size = model.batch_size else: - half = False - batch_size = 1 # export.py models default to batch-size 1 - device = torch.device('cpu') - LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') + device = model.device + if not (pt or jit): + batch_size = 1 # export.py models default to batch-size 1 + LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') # Data data = check_dataset(data) # check # Configure model.eval() + cuda = device.type != 'cpu' is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes - iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 + iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 niou = iouv.numel() # Dataloader if not training: - model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz), half=half) # warmup + model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad = 0.0 if task in ('speed', 'benchmark') else 0.5 rect = False if task == 'benchmark' else pt # square inference for benchmarks task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images @@ -180,7 +178,7 @@ def run(data, pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar for batch_i, (im, targets, paths, shapes) in enumerate(pbar): t1 = time_sync() - if pt or jit or engine: + if cuda: im = im.to(device, non_blocking=True) targets = targets.to(device) im = im.half() if half else im.float() # uint8 to fp16/32 @@ -198,7 +196,7 @@ def run(data, loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls # NMS - targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels + targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t3 = time_sync() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) @@ -297,7 +295,7 @@ def run(data, pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: - eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate + eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize()