/
utils.py
301 lines (250 loc) · 10.8 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# Copyright 2016-2022 The Van Valen Lab at the California Institute of
# Technology (Caltech), with support from the Paul Allen Family Foundation,
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01.
# All rights reserved.
#
# Licensed under a modified Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.github.com/vanvalenlab/deepcell-label-utils/LICENSE
#
# The Work provided may be used for non-commercial academic purposes only.
# For any other use of the Work, including commercial use, please contact:
# vanvalenlab@gmail.com
#
# Neither the name of Caltech nor the names of its contributors may be used
# to endorse or promote products derived from this software without specific
# prior written permission.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Label utils"""
from collections import defaultdict
import io
import json
import zipfile
import cv2
import numpy as np
import pandas as pd
from shapely.geometry import MultiPolygon, Polygon, Point
from skimage.measure import regionprops
from tifffile import TiffFile
class InputError(Exception):
"""Raised when valid inputs are not provided."""
pass
def mask_to_polygons(mask, epsilon=1e-3, min_area=10., approx=True):
"""Convert a mask ndarray (binarized image) to Multipolygons"""
# first, find contours with cv2: it's much faster than shapely
contours, hierarchy = cv2.findContours(mask,
cv2.RETR_CCOMP,
cv2.CHAIN_APPROX_SIMPLE)
if contours and approx:
contours = [cv2.approxPolyDP(cnt,
epsilon * cv2.arcLength(cnt, True), True)
for cnt in contours]
if not contours:
return MultiPolygon()
# now messy stuff to associate parent and child contours
cnt_children = defaultdict(list)
child_contours = set()
assert hierarchy.shape[0] == 1
# http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
if parent_idx != -1:
child_contours.add(idx)
cnt_children[parent_idx].append(contours[idx])
# create actual polygons filtering by area (removes artifacts)
all_polygons = []
for idx, cnt in enumerate(contours):
if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
assert cnt.shape[1] == 1
poly = Polygon(
shell=cnt[:, 0, :],
holes=[c[:, 0, :] for c in cnt_children.get(idx, [])
if cv2.contourArea(c) >= min_area])
all_polygons.append(poly)
all_polygons = MultiPolygon(all_polygons)
return all_polygons
def polygons_to_mask(polygons, im_size):
"""Convert a polygon or multipolygon list back to
an image mask ndarray"""
img_mask = np.zeros(im_size, np.uint8)
if not polygons:
return img_mask
# function to round and convert to int
def int_coords(x):
return np.array(x).round().astype(np.int32)
exteriors = [int_coords(poly.exterior.coords) for poly in polygons.geoms]
interiors = [int_coords(pi.coords) for poly in polygons.geoms
for pi in poly.interiors]
cv2.fillPoly(img_mask, exteriors, 1)
cv2.fillPoly(img_mask, interiors, 0)
return img_mask
class DCLZipLoader:
"""Label converter class for DeepCell Label zip to usable formats."""
def __init__(self, zip_path=None, zip_obj=None,
in_shape='BCTYXZ', out_shape='BTZYXC'):
"""
Load in a file path for a zip exported from DeepCell Label and
automatically reshape to BTZYXC.
"""
if zip_path and zip_obj:
raise InputError("Only one of zip_path and zip_obj can be specified")
elif zip_path:
zf = zipfile.ZipFile(zip_path, 'r')
elif zip_obj:
zf = zip_obj
else:
raise InputError("Either zip_path or zip_obj must be specified")
data = zf.read('X.ome.tiff')
bytes_io = io.BytesIO(data)
X_init = TiffFile(bytes_io).asarray(squeeze=False)
self.X_ome = self.reshape_DCL(X_init,
in_shape, out_shape) # Reshape from _CTYX_ to BTZYXC
self.X = self.to_TYXC(self.X_ome) # Reshape to TYXC for SLC
data = zf.read('y.ome.tiff')
bytes_io = io.BytesIO(data)
y_init = TiffFile(bytes_io).asarray(squeeze=False)
self.y_ome = self.reshape_DCL(y_init,
in_shape, out_shape) # Reshape from _CTYX_ to BTZYXC
self.y = self.to_TYXC(self.y_ome) # Reshape to TYXC for SLC
data = zf.read('cells.json')
cells = json.loads(data.decode('utf-8'))
self.cells = cells
self.segments = pd.DataFrame(self.cells) # To df for SLC
data = zf.read('divisions.json')
divisions = json.loads(data.decode('utf-8'))
self.divisions = divisions
zf.close()
def reshape_DCL(self, arr, in_shape='BCTYXZ', out_shape='BTZYXC'):
"""
Rearrange the axes of the DCL output tiffs from an input
shape specified to return an ndarray with specified output
shape. By default, rearranges from the DCL output _CTYX_ to
the lab standard BTZYXC.
NOTE:
DCL currently puts the time axis in the Z dimension,
so the tiff shape is called CZYX but is actually CTYX.
"""
if len(in_shape) != 6 or len(out_shape) != 6:
raise InputError("Input and output shapes must have 6 dims.")
elif set(in_shape) != set(out_shape):
raise InputError("Input and output must share the same axes.")
# Get mapping of input shape
in_order = {}
counter = 0
for axis in in_shape:
in_order[axis] = counter
counter += 1
out_order = []
# Use input shape mapping to rearrange to output shape
for axis in out_shape:
out_order.append(in_order[axis])
rearranged = np.transpose(arr, axes=out_order)
return rearranged
def to_TYXC(self, arr):
"""
Convert a BTZYCX tiff array to TYXC. This allows for use
with the SpatialLabelConverter.
Raises:
ValueError: B and/or Z dimensions do not have length 1
"""
reshaped = np.squeeze(arr, axis=(0, 2))
if len(reshaped.shape) != 4:
raise ValueError("Could not squeeze to TYXC, B and Z axes must have length 1.")
return reshaped
class SpatialLabelConverter(object):
"""Label converter class for DeepCell Label label format. Converts
DCL labels into binary masks, centroids, bboxes, and polygons"""
def __init__(self, X=None, y=None, segments=None,
zip_path=None, zip_obj=None, test_no_poly=False):
try:
DCL = DCLZipLoader(zip_path=zip_path, zip_obj=zip_obj)
self.X = DCL.X
self.y = DCL.y
self.segments = DCL.segments
except InputError:
self.X = X
self.y = y
self.segments = segments
# Get list of segments
object_ids = self.get_object_ids()
labels = {}
# Iterate over all segments and convert to the new label format
for object_id in object_ids:
seg = self.segments_to_dict(object_id)
mask = self.dcl_to_binary_mask(object_id)
centroid = self.binary_mask_to_centroid(mask)
bbox = self.binary_mask_to_bbox(mask)
# test_no_poly for unit tests below min_area for polygons
if not test_no_poly:
polygon = self.binary_mask_to_polygon(mask)
# Save converted labels to dictionary
labels[object_id] = {'segment': seg,
'coordinate': centroid,
'bbox': bbox}
if not test_no_poly:
labels[object_id]['polygon'] = polygon
self.labels = labels
def get_object_ids(self):
return list(set(self.segments['cell']))
def segments_to_dict(self, object_id):
object_info = self.segments.loc[self.segments['cell'] == object_id]
list_of_dicts = []
for i in range(object_info.shape[0]):
value = object_info.iloc[i]['value']
time = object_info.iloc[i]['t']
channel = object_info.iloc[i]['c']
list_of_dicts.append({'value': value, 't': time, 'c': channel})
return list_of_dicts
def dcl_to_binary_mask(self, object_id):
object_info = self.segments.loc[self.segments['cell'] == object_id]
binary_mask = np.zeros(self.y.shape, dtype=self.y.dtype)
for i in range(object_info.shape[0]):
value = object_info.iloc[i]['value']
time = object_info.iloc[i]['t']
channel = object_info.iloc[i]['c']
binary_mask[time, ..., channel] = np.where(
self.y[time, ..., channel] == value,
1, binary_mask[time, ..., channel])
return binary_mask
def binary_mask_to_centroid(self, mask):
centroids = {}
for t in range(mask.shape[0]):
channels = {}
for c in range(mask.shape[3]):
mt = mask[t, ..., c]
if np.sum(mt.flatten()) > 0:
prop = regionprops(mt)[0]
centroid = Point(prop.centroid[0], prop.centroid[1])
channels[c] = centroid
centroids[t] = channels
return centroids
def binary_mask_to_bbox(self, mask):
bboxes = {}
for t in range(mask.shape[0]):
channels = {}
for c in range(mask.shape[3]):
mt = mask[t, ..., c]
if np.sum(mt.flatten()) > 0:
prop = regionprops(mt)[0]
bbox = list(prop.bbox)
channels[c] = bbox
bboxes[t] = channels
return bboxes
def binary_mask_to_polygon(self, mask):
polygons = {}
for t in range(mask.shape[0]):
channels = {}
for c in range(mask.shape[3]):
mt = mask[t, ..., c]
if np.sum(mt.flatten()) > 0:
poly = mask_to_polygons(mt.astype('uint8'))
channels[c] = poly
polygons[t] = channels
return polygons