-
Notifications
You must be signed in to change notification settings - Fork 1
/
pipeline.py
110 lines (82 loc) · 3.48 KB
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Copyright 2016-2020 The Van Valen Lab at the California Institute of
# Technology (Caltech), with support from the Paul Allen Family Foundation,
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01.
# All rights reserved.
#
# Licensed under a modified Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE
#
# The Work provided may be used for non-commercial academic purposes only.
# For any other use of the Work, including commercial use, please contact:
# vanvalenlab@gmail.com
#
# Neither the name of Caltech nor the names of its contributors may be used
# to endorse or promote products derived from this software without specific
# prior written permission.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import os
import numpy as np
import xarray as xr
def find_sparse_images(labeled_data, cutoff=100):
"""Gets coordinates of images that have very few cells
Args:
labeled_data: segmentation labels
cutoff: minimum number of cells per image
Returns:
numpy.array: index of images above the threshold
"""
unique_counts = []
for img in range(labeled_data.shape[0]):
unique_counts.append(len(np.unique(labeled_data[img])) - 1)
unique_counts = np.array(unique_counts)
idx = unique_counts > cutoff
return idx
def save_stitched_npzs(stitched_channels, stitched_labels, save_dir):
"""Takes corrected labels and channels and saves them into NPZ format
Args:
stitched_channels: xarray containing original channel data
stitched_labels: xarray containing stitched labels
"""
for i in range(stitched_channels.shape[0]):
X = stitched_channels[i:(i + 1), ...]
y = stitched_labels[i:(i + 1), ...]
save_path = os.path.join(save_dir, stitched_labels.fovs.values[i] + '.npz')
np.savez(save_path, X=X, y=y)
def concatenate_npz_files(npz_list):
"""Takes a list of NPZ files and combines the X and y keys of each together
Args:
npz_list: list of NPZ files
Returns:
tuple: concatenated X data and y data
"""
X_data = []
y_data = []
for npz in npz_list:
X_data.append(npz['X'])
y_data.append(npz['y'])
X_data = np.concatenate(X_data, axis=0)
y_data = np.concatenate(y_data, axis=0)
return X_data, y_data
def create_combined_npz(npz_dir, save_name):
"""Takes folder of corrected NPZs and combines together into single NPZ file
Args:
npz_dir: directory containing NPZ files
save_name: name for combined NPZ file
Raises: ValueError if invalid directory name
"""
if not os.path.isdir(npz_dir):
raise ValueError("Invalid directory name")
npz_filenames = os.listdir(npz_dir)
npz_filenames = [file for file in npz_filenames if '.npz' in file]
npz_list = [np.load(os.path.join(npz_dir, npz)) for npz in npz_filenames]
X_concat, y_concat = concatenate_npz_files(npz_list=npz_list)
np.savez(os.path.join(npz_dir, save_name), X=X_concat, y=y_concat)