-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathpostprocess.py
214 lines (168 loc) · 7.95 KB
/
postprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# -*- coding:utf-8 -*-
"""
YOLOv8 图像预处理
"""
import numpy as np
import cv2
def xywh2xyxy(bboxes):
"""
Convert nx4 boxes from [center x, center y, w, h, conf, class_id] to [x1, y1, x2, y2, conf, class_id]
"""
out = bboxes.copy()
out[:, 0] = bboxes[:, 0] - bboxes[:, 2] / 2 # top left x
out[:, 1] = bboxes[:, 1] - bboxes[:, 3] / 2 # top left y
out[:, 2] = bboxes[:, 0] + bboxes[:, 2] / 2 # bottom right x
out[:, 3] = bboxes[:, 1] + bboxes[:, 3] / 2 # bottom right y
return out
def nms(bboxes, scores, threshold=0.5):
x1 = bboxes[:, 0]
y1 = bboxes[:, 1]
x2 = bboxes[:, 2]
y2 = bboxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, (xx2 - xx1))
h = np.maximum(0.0, (yy2 - yy1))
inter = w * h
iou = inter / (areas[i] + areas[order[1:]] - inter)
indexes = np.where(iou <= threshold)[0]
order = order[indexes + 1]
keep = np.array(keep).astype(int)
return keep
def clip_coords(boxes, img_shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
boxes[:, 0] = boxes[:, 0].clip(0, img_shape[1]) # x1
boxes[:, 1] = boxes[:, 1].clip(0, img_shape[0]) # y1
boxes[:, 2] = boxes[:, 2].clip(0, img_shape[1]) # x2
boxes[:, 3] = boxes[:, 3].clip(0, img_shape[0]) # y2
def scale_coords(img1_shape, coords, img0_shape):
# Rescale coords (xyxy) from img1_shape to img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords
def scale_masks(masks, img0_shape):
"""
Takes a mask, and resizes it to the original image size.
:param masks: (np.ndarray) resized and padded masks, [num, h, w].
:param img0_shape: (tuple) the original image shape
:return: The masks that are being returned.
"""
masks = masks.transpose((1, 2, 0)) # CHW to HWC, (n, 160, 160) to (160, 160, n)
img1_shape = masks.shape # (160, 160, n)
if img1_shape[:2] == img0_shape[:2]:
masks = masks.transpose((2, 0, 1)) # HWC to CHW
return masks
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
top, left = int(pad[1]), int(pad[0]) # y, x
bottom, right = int(img1_shape[0] - pad[1]), int(img1_shape[1] - pad[0])
masks = masks[top:bottom, left:right]
masks = cv2.resize(masks, (img0_shape[1], img0_shape[0]), interpolation=cv2.INTER_LINEAR)
if len(masks.shape) == 2:
masks = masks[:, :, None]
masks = masks.transpose((2, 0, 1)) # HWC to CHW
mask1 = masks > 0.5
mask2 = masks <= 0.5
masks[mask1] = 1
masks[mask2] = 0
return masks
def sigmoid(x):
s = 1 / (1 + np.exp(-x))
return s
def crop_mask(masks, boxes):
"""
It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.
:param masks: [n, h, w] ndarray of masks
:param boxes: [n, 4] ndarray of bbox coordinates in relative point form
:return: The masks are being cropped to the bounding box.
"""
_, h, w = masks.shape
x1 = boxes[:, 0][:, None, None]
y1 = boxes[:, 1][:, None, None]
x2 = boxes[:, 2][:, None, None]
y2 = boxes[:, 3][:, None, None]
c = np.arange(w, dtype=x1.dtype)[None, None, :] # cols shape(1,1,w)
r = np.arange(h, dtype=x1.dtype)[None, :, None] # rows shape(1,h,1)
return masks * ((c >= x1) * (c < x2) * (r >= y1) * (r < y2))
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
"""
Apply masks to bounding boxes using the output of the mask head.
:param protos: A ndarray of shape [mask_dim, mask_h, mask_w], default [32, 160, 160].
:param masks_in: A ndarray of shape [n, mask_dim], where n is the number of masks after NMS.
:param bboxes: A ndarray of shape [n, 4], where n is the number of masks after NMS.
:param shape: A tuple of integers representing the size of the input image in the format (h, w).
:param upsample: A flag to indicate whether to upsample the mask to the original image size. Default is False.
:return:
(ndarray): A binary mask ndarray of shape [n, h, w], where n is the number of masks after NMS, and h and w
are the height and width of the input image. The mask is applied to the bounding boxes.
"""
c, mh, mw = protos.shape # 32, 160, 160
ih, iw = shape # 640, 640
masks = sigmoid(masks_in @ protos.reshape((c, -1))).reshape((-1, mh, mw)) # CHW
width_ratio = mw / iw
height_ratio = mh / ih
downsampled_bboxes = bboxes.copy()
downsampled_bboxes[:, 0] *= width_ratio
downsampled_bboxes[:, 1] *= height_ratio
downsampled_bboxes[:, 2] *= width_ratio
downsampled_bboxes[:, 3] *= height_ratio
masks = crop_mask(masks, downsampled_bboxes) # CHW
if upsample:
masks = masks.transpose((1, 2, 0)) # CHW to HWC
masks = cv2.resize(masks, (iw, ih), interpolation=cv2.INTER_LINEAR) # bilinear upsample
if len(masks.shape) == 2:
masks = masks[:, :, None]
masks = masks.transpose((2, 0, 1)) # HWC to CHW
return masks
def postprocess(img0, preds, conf_thres, iou_thres, input_h, input_w, nc=0):
"""
img0: original image
preds: YOLOv8 segment output, tuple (out1, out2)
out1: shape (32, 160, 160) out2: shape (116, 8400)
"""
prediction = preds[1] # shape (116, 8400)
nc = nc or (prediction.shape[0] - 4) # number of classes
nm = prediction.shape[0] - nc - 4 # number of mask coefficient, default 32
mi = 4 + nc # mask start index
xc = prediction[4:mi].max(0) > conf_thres # [ True, False, True, False, True, True, ... ]
prediction = prediction.transpose((1, 0))
x = prediction[xc]
if not x.shape[0]:
return np.empty((0, 6), dtype=np.float32), np.empty((0, 640))
box = x[:, :4]
cls = x[:, 4:mi]
mask = x[:, mi:]
i, j = np.where(cls > conf_thres)
x = np.concatenate((box[i], x[i, 4 + j, None], j[:, None], mask[i]), 1)
# x shape : (num_bboxes, 38)
# 38 dims are : center_x, center_y, w, h, conf, class_id, 32 mask
bboxes = xywh2xyxy(x)
labels = set(bboxes[:, 5].astype(int))
detected_objects = [] # [[x1, y1, x2, y2, conf, class_id, 32 mask], [...], [...]]
for label in labels:
selected_bboxes = bboxes[np.where(bboxes[:, 5] == label)]
selected_bboxes_keep = selected_bboxes[nms(selected_bboxes[:, :4], selected_bboxes[:, 4], iou_thres)]
detected_objects += selected_bboxes_keep.tolist()
if detected_objects:
detected_objects = np.array(detected_objects)
else:
return np.empty((0, 6), dtype=np.float32), np.empty((0, 640))
proto = preds[0] # shape (32, 160, 160)
masks = process_mask(proto, detected_objects[:, 6:], detected_objects[:, :4], (input_h, input_w))
# masks: shape (num_bboxes, input_h / 4, input_w / 4), such as (8, 160, 160)
masks = scale_masks(masks, img0.shape)
# masks: shape (num_bboxes, origin_h, origin_w), such as (8, 476, 538)
detected_objects[:, :4] = scale_coords((input_h, input_w), detected_objects[:, :4], img0.shape[:2]).round()
return detected_objects[:, :6], masks