Skip to content

Commit

Permalink
Create NPZ file for training (#99)
Browse files Browse the repository at this point in the history
* npz build function

* added deepcell-toolbox

* padding options

* correct function name

* update for cropping generator

* Update requirements.txt

* Update caliban_toolbox/build.py

* Update caliban_toolbox/build.py
  • Loading branch information
ngreenwald committed Jul 20, 2020
1 parent a91e55b commit cd56fac
Show file tree
Hide file tree
Showing 3 changed files with 248 additions and 0 deletions.
112 changes: 112 additions & 0 deletions caliban_toolbox/build.py
@@ -0,0 +1,112 @@
# Copyright 2016-2020 The Van Valen Lab at the California Institute of
# Technology (Caltech), with support from the Paul Allen Family Foundation,
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01.
# All rights reserved.
#
# Licensed under a modified Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE
#
# The Work provided may be used for non-commercial academic purposes only.
# For any other use of the Work, including commercial use, please contact:
# vanvalenlab@gmail.com
#
# Neither the name of Caltech nor the names of its contributors may be used
# to endorse or promote products derived from this software without specific
# prior written permission.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import math

import numpy as np

from deepcell_toolbox.utils import resize, tile_image


def pad_image_stack(images, crop_size):
"""Pads an an array of images so that it is divisible by the specified crop_size
Args:
images: array of images to be cropped
crop_size: tuple specifying crop size
Returns:
np.array: padded image stack
"""

row_len, col_len = images.shape[1:3]
row_crop, col_crop = crop_size
row_num = math.ceil(row_len / crop_size[0])
col_num = math.ceil(col_len / crop_size[1])

new_row_len = row_num * row_crop
new_col_len = col_num * col_crop

if new_row_len == row_len and new_col_len == col_len:
# don't need to pad
return images
else:
new_images = np.zeros((images.shape[0], new_row_len, new_col_len, images.shape[3]))
new_images[:, :row_len, :col_len, :] = images
return new_images


def combine_npz_files(npz_list, resize_ratios, stride_ratio=1, final_size=(256, 256)):
"""Take a series of NPZ files and combine together into single training NPZ
Args:
npz_list: list of NPZ files to combine. Currently only works on 2D static data
resize_ratios: ratio used to resize each NPZ if data is of different resolutions
stride_ratio: amount of overlap between crops (1 is no overlap, 0.5 is half crop size)
final_size: size of the final crops to be produced
Returns:
np.array: array containing resized and cropped data from all input NPZs
Raises:
ValueError: If resize ratios are not integers
"""

combined_x = []
combined_y = []

for idx, npz in enumerate(npz_list):
current_x = npz['X']
current_y = npz['y']

# resize if needed
# TODO: Add tolerance to control when resizing happens
current_resize = resize_ratios[idx]
if current_resize != 1:
new_shape = (int(current_x.shape[1] * current_resize),
int(current_x.shape[2] * current_resize))

current_x = resize(data=current_x, shape=new_shape)
current_y = resize(data=current_y, shape=new_shape, labeled_image=True)

# crop if needed
if current_x.shape[1:3] != final_size:

# pad image so that crops divide evenly
current_x = pad_image_stack(images=current_x, crop_size=final_size)
current_y = pad_image_stack(images=current_y, crop_size=final_size)

# create x and y crops
current_x, _ = tile_image(image=current_x, model_input_shape=final_size,
stride_ratio=stride_ratio)
current_y, _ = tile_image(image=current_y, model_input_shape=final_size,
stride_ratio=stride_ratio)

combined_x.append(current_x)
combined_y.append(current_y)

combined_x = np.concatenate(combined_x, axis=0)
combined_y = np.concatenate(combined_y, axis=0)

return combined_x, combined_y
135 changes: 135 additions & 0 deletions caliban_toolbox/build_test.py
@@ -0,0 +1,135 @@
# Copyright 2016-2020 The Van Valen Lab at the California Institute of
# Technology (Caltech), with support from the Paul Allen Family Foundation,
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01.
# All rights reserved.
#
# Licensed under a modified Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE
#
# The Work provided may be used for non-commercial academic purposes only.
# For any other use of the Work, including commercial use, please contact:
# vanvalenlab@gmail.com
#
# Neither the name of Caltech nor the names of its contributors may be used
# to endorse or promote products derived from this software without specific
# prior written permission.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np

from caliban_toolbox import build


def _make_npzs(size, num):
npz_list = []

for i in range(num):
x = np.zeros((1, ) + size + (4, ))
y = np.zeros((1,) + size + (1,))
npz = {'X': x, 'y': y}

npz_list.append(npz)

return npz_list


def test_pad_image_stack():
# rows and cols both need to be modified
input_stack = np.zeros((2, 55, 55, 2))
tags = [1, 2]
input_stack[:, 0, 0, 0] = tags
crop_size = (10, 10)
padded_stack = build.pad_image_stack(images=input_stack, crop_size=crop_size)
assert padded_stack.shape == (2, 60, 60, 2)
assert np.all(padded_stack[:, 0, 0, 0] == tags)

# just rows need to be modified
input_stack = np.zeros((2, 50, 35, 2))
input_stack[:, 0, 0, 0] = tags
crop_size = (10, 10)
padded_stack = build.pad_image_stack(images=input_stack, crop_size=crop_size)
assert padded_stack.shape == (2, 50, 40, 2)
assert np.all(padded_stack[:, 0, 0, 0] == tags)

# neither needs to be modified
input_stack = np.zeros((2, 30, 50, 2))
input_stack[:, 0, 0, 0] = tags
crop_size = (10, 10)
padded_stack = build.pad_image_stack(images=input_stack, crop_size=crop_size)
assert padded_stack.shape == input_stack.shape
assert np.all(padded_stack[:, 0, 0, 0] == tags)


def test_combine_npz_files():
# NPZ files are appropriate size and resolution
npz_list = _make_npzs((256, 256), 2)
resize_ratios = [1] * 2
final_size = (256, 256)

combined_npz = build.combine_npz_files(npz_list=npz_list, resize_ratios=resize_ratios,
final_size=final_size)

combined_x, combined_y = combined_npz

# check that correct number of NPZs present
assert combined_x.shape[0] == len(npz_list)

# check correct size of NPZs
assert combined_x.shape[1:3] == final_size

# NPZ files need to be cropped
npz_crop_list = _make_npzs((512, 512), 3)
resize_ratios = [1] * 3
final_size = (256, 256)

combined_npz = build.combine_npz_files(npz_list=npz_crop_list, resize_ratios=resize_ratios,
final_size=final_size)

combined_x, combined_y = combined_npz

# check that correct number of NPZs present
assert combined_x.shape[0] == len(npz_crop_list) * 4

# check correct size of NPZs
assert combined_x.shape[1:3] == final_size

# NPZ files need to be resized
npz_resize_list = _make_npzs((256, 256), 5)
resize_ratios = [3] * 5
final_size = (256, 256)

combined_npz = build.combine_npz_files(npz_list=npz_resize_list, resize_ratios=resize_ratios,
final_size=final_size)

combined_x, combined_y = combined_npz

# check that correct number of NPZs present
assert combined_x.shape[0] == len(npz_resize_list) * (resize_ratios[0] ** 2)

# check correct size of NPZs
assert combined_x.shape[1:3] == final_size

# some need to be cropped, some need to be resized
npz_list = npz_crop_list + npz_resize_list
resize_ratios = [1] * 3 + [3] * 5
final_size = (256, 256)

combined_npz = build.combine_npz_files(npz_list=npz_list, resize_ratios=resize_ratios,
final_size=final_size)

combined_x, combined_y = combined_npz

# check that correct number of NPZs present
assert combined_x.shape[0] == (len(npz_crop_list) * 4 +
len(npz_resize_list) * (resize_ratios[4] ** 2))

# check correct size of NPZs
assert combined_x.shape[1:3] == final_size
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -11,3 +11,4 @@ boto3>=1.9.0
xarray==0.13.0
netCDF4==1.5.3
pathlib==1.0.1
deepcell-toolbox>=0.6.1

0 comments on commit cd56fac

Please sign in to comment.