Skip to content

Commit

Permalink
fix bug with resizing
Browse files Browse the repository at this point in the history
  • Loading branch information
ngreenwald committed Aug 24, 2020
1 parent 46b2acf commit 307ec00
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
4 changes: 2 additions & 2 deletions caliban_toolbox/DatasetBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def _reshape_dict(self, dict, resize=False, output_shape=(512, 512), resize_targ

# check for empty images
if median_cell_size is not None:
resize_ratio = median_cell_size / resize_target
resize_ratio = np.sqrt(resize_target / median_cell_size)
else:
resize_ratio = 1

Expand Down Expand Up @@ -347,7 +347,7 @@ def _reshape_dict(self, dict, resize=False, output_shape=(512, 512), resize_targ

# check for empty images
if median_cell_size is not None:
resize_ratio = median_cell_size / resize_target
resize_ratio = np.sqrt(resize_target / median_cell_size)
else:
resize_ratio = 1

Expand Down
44 changes: 36 additions & 8 deletions caliban_toolbox/DatasetBuilder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,16 @@ def mocked_compute_cell_size(data_dict, by_image):
X = data_dict['X']
constant_val = X[0, 0, 0, 0]

multiplier = 400 + (400 * constant_val)
# The default resize is 400. We want to create median cell sizes that divide evenly
# into that number when computing the desired resize ratio

return multiplier
# even constant_vals will return a median cell size 1/4 the size of the target, odds 4x
if constant_val % 2 == 0:
cell_size = 100
else:
cell_size = 1600

return cell_size


def test__init__(tmp_path):
Expand Down Expand Up @@ -294,6 +301,8 @@ def test__reshape_dict_no_resize(tmp_path):
X_reshaped, tissue_list_reshaped = reshaped_dict['X'], reshaped_dict['tissue_list']
assert X_reshaped.shape[1:3] == output_shape

assert X_reshaped.shape[0] == 4 * data_dict['X'].shape[0]

# make sure that for each tissue, the arrays with correct value have correct tissue label
for constant_val, tissue in enumerate(tissues):
tissue_idx = X_reshaped[:, 0, 0, 0] == constant_val
Expand Down Expand Up @@ -321,14 +330,24 @@ def test__reshape_dict_by_tissue(tmp_path, mocker):

# make sure that for each tissue, the arrays with correct value have correct tissue label
for constant_val, tissue in enumerate(tissues):
tissue_idx = X_reshaped[:, 0, 0, 0] == constant_val
# each image was tagged with a different, compute that here
image_val = np.max(X_reshaped, axis=(1, 2, 3))

tissue_idx = image_val == constant_val
tissue_labels = np.array(tissue_list_reshaped)[tissue_idx]
assert np.all(tissue_labels == tissue)

# Each tissue type starts with length 5, and is resized according to its constant value
assert len(tissue_labels) == 5 * ((constant_val + 1) ** 2)
# There were originally 5 images of each tissue type. Tissue types with even values
# are resized to be 2x larger on each dimension, and should have 4x more images
if constant_val % 2 == 0:
assert len(tissue_labels) == 5 * 4
# tissue types with odd values are resized to be smaller, which leads to same number
# of unique images due to padding
else:
assert len(tissue_labels) == 5


# TODO: Is there a way to check the resize value of each unique image?
def test__reshape_dict_by_image(tmp_path, mocker):
mocker.patch('caliban_toolbox.DatasetBuilder.compute_cell_size', mocked_compute_cell_size)
# workaround so that __init__ doesn't throw an error
Expand All @@ -349,12 +368,21 @@ def test__reshape_dict_by_image(tmp_path, mocker):

# make sure that for each tissue, the arrays with correct value have correct tissue label
for constant_val, tissue in enumerate(tissues):
tissue_idx = X_reshaped[:, 0, 0, 0] == constant_val
# each image was tagged with a different, compute that here
image_val = np.max(X_reshaped, axis=(1, 2, 3))

tissue_idx = image_val == constant_val
tissue_labels = np.array(tissue_list_reshaped)[tissue_idx]
assert np.all(tissue_labels == tissue)

# Each tissue type starts with length 5, and is resized according to its constant value
assert len(tissue_labels) == 5 * ((constant_val + 1) ** 2)
# There were originally 5 images of each tissue type. Tissue types with even values
# are resized to be 2x larger on each dimension, and should have 4x more images
if constant_val % 2 == 0:
assert len(tissue_labels) == 5 * 4
# tissue types with odd values are resized to be smaller, which leads to same number
# of unique images due to padding
else:
assert len(tissue_labels) == 5


def test__clean_labels(tmp_path):
Expand Down

0 comments on commit 307ec00

Please sign in to comment.