diff --git a/.travis.yml b/.travis.yml index 0b06458..443069b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,4 @@ -sudo: false -dist: trusty +dist: xenial git: depth: false @@ -8,16 +7,15 @@ language: python python: - 3.6 - -cache: pip + - 3.7 install: - - pip install -r requirements.txt + - travis_retry pip install -r requirements.txt --progress-bar off # install testing requirements - - pip install pytest==5.2.0 pytest-cov==2.5.1 pytest-pep8 coveralls + - travis_retry pip install -r requirements-test.txt --progress-bar off script: - - python -m pytest --pep8 --cov=caliban_toolbox caliban_toolbox + - python -m pytest --cov=caliban_toolbox --pep8 after_success: - coveralls diff --git a/Dockerfile b/Dockerfile index 829a5fa..f48a835 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.6 +FROM python:3.7 # System maintenance RUN apt-get update && apt-get install -y \ diff --git a/caliban_toolbox/__init__.py b/caliban_toolbox/__init__.py index 98b08bb..8023fc6 100644 --- a/caliban_toolbox/__init__.py +++ b/caliban_toolbox/__init__.py @@ -23,4 +23,25 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Data Engineering Toolbox for DeepCell""" +"""Data Engineering Toolbox for Caliban""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from caliban_toolbox import pre_annotation +# from caliban_toolbox import tracking +from caliban_toolbox import utils +from caliban_toolbox import aws_functions +from caliban_toolbox import figure_eight_functions +from caliban_toolbox import log_file +from caliban_toolbox import relabel +from caliban_toolbox import reshape_data + +from caliban_toolbox.pre_annotation import * +# from caliban_toolbox.tracking import * +from caliban_toolbox.utils import * + +del absolute_import +del division +del print_function diff --git a/caliban_toolbox/aws_functions.py b/caliban_toolbox/aws_functions.py index 8375d43..eebb172 100755 --- a/caliban_toolbox/aws_functions.py +++ b/caliban_toolbox/aws_functions.py @@ -23,18 +23,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -import sys -import boto3 import os +import sys import threading import re -from urllib.parse import urlencode +import boto3 -import numpy as np +from urllib.parse import urlencode from getpass import getpass -from caliban_toolbox.utils.utils import get_img_names, list_npzs_folder +from caliban_toolbox.utils.misc_utils import list_npzs_folder # Taken from AWS Documentation diff --git a/caliban_toolbox/pre_annotation/__init__.py b/caliban_toolbox/pre_annotation/__init__.py new file mode 100644 index 0000000..b6a118b --- /dev/null +++ b/caliban_toolbox/pre_annotation/__init__.py @@ -0,0 +1,36 @@ +# Copyright 2016-2020 The Van Valen Lab at the California Institute of +# Technology (Caltech), with support from the Paul Allen Family Foundation, +# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01. +# All rights reserved. +# +# Licensed under a modified Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE +# +# The Work provided may be used for non-commercial academic purposes only. +# For any other use of the Work, including commercial use, please contact: +# vanvalenlab@gmail.com +# +# Neither the name of Caltech nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific +# prior written permission. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Caliban Toolbox Pre-Annotation Module""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from caliban_toolbox.pre_annotation import data_loader + +del absolute_import +del division +del print_function diff --git a/caliban_toolbox/pre_annotation/data_loader.py b/caliban_toolbox/pre_annotation/data_loader.py new file mode 100644 index 0000000..00409d9 --- /dev/null +++ b/caliban_toolbox/pre_annotation/data_loader.py @@ -0,0 +1,449 @@ +# Copyright 2016-2020 David Van Valen at California Institute of Technology +# (Caltech), with support from the Paul Allen Family Foundation, Google, +# & National Institutes of Health (NIH) under Grant U24CA224309-01. +# All rights reserved. +# +# Licensed under a modified Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE +# +# The Work provided may be used for non-commercial academic purposes only. +# For any other use of the Work, including commercial use, please contact: +# vanvalenlab@gmail.com +# +# Neither the name of Caltech nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific +# prior written permission. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Load raw data (with metadata) from CellNet-formatted ontology.""" + +from __future__ import absolute_import + +import os +import json +import fnmatch + +from pathlib import Path +import numpy as np + +from skimage.external import tifffile as tiff + +import pandas as pd + +from caliban_toolbox.utils.misc_utils import sorted_nicely + + +class UniversalDataLoader(object): + """Given a CellNet data type, load and store image set and metadata for + initial predictions and crowdsourcing annotation curation. + + The raw data and metadata file should be arranged according to the CellNet + data ontology. The root of this ontology should be mounted as /data within + the container. The image files are stored in a numpy array together with a + dictionary object for the metadata. + + Excluding data and imaging type, Arg options include: all and random + (random picks one file at random - best used for testing). + + Args: + data type (tuple): CellNet data type ('dynamic/static', '2d/3d') + imaging types (list): imaging modality of interest ('fluo', 'phase', etc) + specimen types (list): specimen of interest (HEK293, HeLa, etc) + compartments (list): compartments of interest (nuclear, whole_cell) + marker (list): marker of interest + exp_ids/DOIs (list): Experiment ID or DOI of the dataset + sessions (list): which sessionss to include + positions/FOVs (list): which positionss/FOVs to include + + - list should be strings and match CellNet ontology + (e.g. data type = ['dynamic', '2d']) + - 'all' selects all data from a given catagory + (e.g. sessions=['all']) + + Returns: + Numpy array with the shape [fovs, z_dim(time or space), y_dim, x_dim] + Python dictionary containing metadata + """ + + def __init__(self, + data_type, + imaging_types, + specimen_types, + compartments=None, + markers=['all'], # the following should be sets to prevent double 'alls etc + exp_ids=['all'], + sessions=['all'], + positions=['all'], + file_type='.tif'): + + if compartments is None and imaging_types != ['phase']: + raise ValueError('Compartments is not specified') + + self.data_type = data_type + self.imaging_types = set(imaging_types) + self.specimen_types = set(specimen_types) + self.compartments = set(compartments) + self.markers = set(markers) + self.exp_ids = set(exp_ids) + self.sessions = set(sessions) + self.positions = set(positions) + self.file_type = file_type + self.onto_levels = np.full(7, False) + + self._vocab_check() + + self.base_path = '/data/raw_data' + for item in self.data_type: + self.base_path = os.path.join(self.base_path, item) + + self._datasets_available() # TODO: keep list of datasets for comparison + self._calc_upper_bound() + + def _vocab_check(self): + """Check each user input for common mistakes and correct as neccesary + """ + # TODO: improve this for generality and scale + + # Dictionaries of common spellings + img_fluo_misspell = {'flourescent', 'fluorescence', 'fluorescent', 'fluo'} + comp_nuc_misspell = {'nuc', 'nuclear'} + comp_wc_misspell = {'wholecell', 'whole_cell', 'whole cell'} + + # imaging_types - check for fluo misspellings + new_imaging_types = [] + for item in self.imaging_types: + item = item.lower() + if item in img_fluo_misspell: + new_imaging_types.append('fluo') + elif item == 'phase': + new_imaging_types.append('phase') + else: + new_imaging_types.append(item) + + self.imaging_types = new_imaging_types + + # compartments - check for nuc or capitalization + # None type only allowed if its the only entry (img type must be phase only) + if None not in self.compartments: + new_compartments = [] + for item in self.compartments: + item = item.lower() + if item in comp_nuc_misspell: + new_compartments.append('Nuclear') + elif item in comp_wc_misspell: + new_compartments.append('WholeCell') + else: + new_compartments.append(item) + + self.compartments = new_compartments + + def _calc_upper_bound(self): + """Calculate how many 'alls' do we have and at what levels + """ + for level, spec in enumerate([self.imaging_types, + self.specimen_types, + self.compartments, + self.markers, + self.exp_ids, + self.sessions, + self.positions]): + spec = list(spec) + + try: + if len(spec) == 1 and spec[0].lower() == 'all': + self.onto_levels[level] = True + except: + # spec value is None and level should be left as False + continue + + # TODO: Raise a warning that 'all's or 'None's are in use + + def _path_builder(self, root_path, list_of_dirs): + """Add several folders to a single path making several new paths. + and verify that these new paths exist. + + Args: + root_path (path): base path to add to + list_of_dirs (list): directory names to add to the base path + + Returns: + list: combined path of length equal to number of dirs in list_of_dirs + """ + new_paths = [] + for item in list_of_dirs: + candidate_path = os.path.join(root_path, item) + if Path.exists(Path(candidate_path)): + new_paths.append(candidate_path) + else: + # TODO: Switch this to a logger statement + print('Warning! Path:', candidate_path, 'Does Not Exist!') + + return new_paths + + def _assemble_paths(self): + """Go through permuations of parameters and assemble paths that lead to the + directories of interest (containing a metadata json file) as well as img stacks + """ + # maybe a dictionary would be better here? need to map multiple tiff files to a data dir + # probably should be a class per dataset + # TODO: polish the logic + + if self.onto_levels[0]: + presort = os.listdir(self.base_path) + self.imaging_types = sorted_nicely(presort) + imaging_paths = self._path_builder(self.base_path, self.imaging_types) + + specimen_paths = [] + for thing in imaging_paths: + if self.onto_levels[1]: + presort = os.listdir(thing) + self.specimen_types = sorted_nicely(presort) + specimen_paths.extend(self._path_builder(thing, self.specimen_types)) + + # The following conditional doesn't work for phase (phase has no compartments or marker) + # So we need a different branch to handle that here + compartments_marker_paths = [] + if 'phase' in self.imaging_types: + for thing in specimen_paths: + thing_path = Path(thing) + thing_parts = os.path.split(thing_path.parent) + if thing_parts[1] == 'phase': + compartments_marker_paths.append(thing) + + # Until now each spec has been standalone + # Now we need to start combining specs + if self.onto_levels[2] and self.onto_levels[3]: + # All compartmentss and all markers + # We grab every directory + for thing in specimen_paths: + presort = os.listdir(thing) + thing_sorted = sorted_nicely(presort) + compartments_marker_paths.extend(self._path_builder(thing, thing_sorted)) + + elif self.onto_levels[2]: + # All compartmentss but not all markers + for thing in specimen_paths: + to_filter = sorted_nicely(os.listdir(thing)) + base_pattern = '*_' + for item in self.markers: + pattern = base_pattern + item + dirs_to_keep = fnmatch.filter(to_filter, pattern) + compartments_marker_paths.extend(self._path_builder(thing, dirs_to_keep)) + + elif self.onto_levels[3]: + # Not all compartmentss but all markers (all markers for a given compartments) + for thing in specimen_paths: + to_filter = sorted_nicely(os.listdir(thing)) + base_pattern = '_*' + if self.compartments is not None: + for item in self.compartments: + pattern = item + base_pattern + dirs_to_keep = fnmatch.filter(to_filter, pattern) + compartments_marker_paths.extend(self._path_builder(thing, dirs_to_keep)) + + else: + # Specific compartmentss with specific markers + # This is a tricky one because we have to check on marker compatibility + for thing in specimen_paths: + to_filter = sorted_nicely(os.listdir(thing)) + for item1 in self.compartments: + for item2 in self.markers: + pattern = item1 + '_' + item2 + dirs_to_keep = fnmatch.filter(to_filter, pattern) + compartments_marker_paths.extend(self._path_builder(thing, dirs_to_keep)) + + # Exp_ids/DOI + # for each path in compartments_marker_paths we need to select the correct experiment id + exp_ids_paths = [] + for thing in compartments_marker_paths: + if self.onto_levels[4]: + exp_ids = sorted_nicely(os.listdir(thing)) + exp_ids_paths.extend(self._path_builder(thing, exp_ids)) + + # The exp_ids_path is the directory that holds the images and metadata file + + # sessions and positions + # Again: + # Now we need to start combining specs + + image_paths = [] + if self.onto_levels[5] and self.onto_levels[6]: + # All sessionss and all positionss + # We grab every directory + for thing in exp_ids_paths: + images = [] + thing_sorted = sorted_nicely(os.listdir(thing)) + for file in thing_sorted: + if file.endswith(self.file_type): + images.append(file) + image_paths.append(self._path_builder(thing, images)) + + elif self.onto_levels[5]: + # All sessionss but not all positionss + for thing in exp_ids_paths: + to_filter = sorted_nicely(os.listdir(thing)) + for item in self.positions: + pattern = '*_s*_p' + item.zfill(2) + self.file_type + dirs_to_keep = fnmatch.filter(to_filter, pattern) + image_paths.append(self._path_builder(thing, dirs_to_keep)) + + elif self.onto_levels[6]: + # Not all sessionss but all positionss (all positionss for a given sessions) + for thing in exp_ids_paths: + to_filter = sorted_nicely(os.listdir(thing)) + for item in self.sessions: + pattern = '*_s' + item.zfill(2) + '*' + self.file_type + dirs_to_keep = fnmatch.filter(to_filter, pattern) + image_paths.append(self._path_builder(thing, dirs_to_keep)) + + else: + # Specific compartmentss with specific markers + # This is a tricky one because we have to check on marker compatibility + for thing in exp_ids_paths: + to_filter = sorted_nicely(os.listdir(thing)) + for item1 in self.sessions: + for item2 in self.positions: + pattern = '*_s' + item1.zfill(2) + '_p' + item2.zfill(2) + self.file_type + dirs_to_keep = fnmatch.filter(to_filter, pattern) + image_paths.append(self._path_builder(thing, dirs_to_keep)) + + return (exp_ids_paths, image_paths) + + def _datasets_available(self): + # This function should be part of a different system and constantly maintained + # This is a placeholder for a database that tells us what data is available + for (cur_dir, sub_dirs, files) in os.walk(self.base_path): + if not sub_dirs and not files: + print(cur_dir) + print('empty directory') + print('--------------------------------') + if not sub_dirs and len(files) == 2: + print(cur_dir) + print('only 1 file') + print('--------------------------------') + + def _check_compatibility(self): + """Verify that the image data has the same resolution/size/etc + """ + compatible = True + + # Check Image Dimensions + dims = pd.DataFrame(list(self.metadata_all['DIMENSIONS'])) + unique_entries_x = dims['X'].unique() + unique_entries_y = dims['Y'].unique() + if len(unique_entries_x) != 1 or len(unique_entries_y) != 1: + print('Padding required') # TODO: Switch this to a logging statement + compatible = False + + # Check Resolution (using pixel size) + res_mag = pd.DataFrame(list(self.metadata_all['IMAGING_PARAMETERS'])) + unique_entries = res_mag['PIXEL_SIZE'].unique() + if len(unique_entries) != 1: + print('Pixel size mismatch') # TODO: Switch this to a logging statement + compatible = False + + # Magnificaiton + unique_entries = res_mag['MAGNIFICATION'].unique() + if len(unique_entries) != 1: + print('Magnification mismatch') # TODO: Switch this to a logging statement + compatible = False + + # TODO: Add field to metaadata to check for number of frames + + return compatible + + def load_metadata(self): + """Build a database that includes all the the metadata information + as well as the paths to the individual image files + """ + # TODO: Replace with query when DB is persistent + + (metadata_dirs, image_paths) = self._assemble_paths() + + # Check that paths are good by verifying metadata files + # If so, then load and organize metadata information + metadata_all = [] + for (metadata_dir, image_path) in zip(metadata_dirs, image_paths): + + mdf_path = os.path.join(metadata_dir, 'metadata') + if not os.path.isfile(mdf_path): + raise ValueError("Metadata file does not exist") + + with open(mdf_path, 'r') as raw_mdf: + raw_data = json.load(raw_mdf) + + # Manipulate the information in raw to a useful pandas dataframe + metadata_f = pd.DataFrame.from_dict(raw_data, orient='index').transpose() + metadata_f['TYPE'] = metadata_f['TYPE'].str.cat(sep=' ') + metadata_f['ONTOLOGY'] = metadata_f['ONTOLOGY'].str.cat(sep=' ') + metadata_f = metadata_f.dropna() + # Add a field to keep track of all the images assoicated with this metadata + metadata_f['PATHS'] = [image_path] + # Add this frame to the master list + metadata_all.append(metadata_f) + + # Change the list into a dataframe + self.metadata_all = pd.concat(metadata_all) + + def load_imagedata(self): + """Load the image data + """ + + # TODO: The metadata should include num_frames, but does not currently + # So, for now, we will load these images in a list + + # The dimensions of these images will vary in size and meaning depending on where + # the files exist in the ontology (eg: [time, y, x] for 2d dynamic data but + # [z, y, x] for 3d static data) + # Channels are handeled by the ontology (stored in other images) but will need to be + # correctly associated based on metadata + + # Check for compatibility + try: + self.metadata_all + except NameError: + print("Metadata not found!") + + compatibility = self._check_compatibility() + + if compatibility: + # Instantiate somthing to hold all the images + raw_images = [] # should be np.zeroes(shape) + dims = pd.DataFrame(list(self.metadata_all['DIMENSIONS'])) + dims_x = int(dims['X'].unique()[0]) + dims_y = int(dims['Y'].unique()[0]) + max_frames = 0 # TODO: Remove when metadata corrected + for index, row in self.metadata_all.iterrows(): + # Each row contains several paths that have the same metadata + # Perform some logic on the metadata to determine the size of the array + for path in row['PATHS']: + # Read in the image + img_set = tiff.imread(path) + raw_images.append(img_set) + if img_set.shape[0] > max_frames: + max_frames = img_set.shape[0] + + # TODO: the following wont be neccesary when num_frames exist + # TODO: the len(raw_images) could also be replaced by a column in dataframe + raw_image_array = np.zeros([len(raw_images), + max_frames, + dims_y, dims_x]) + for index, item in enumerate(raw_images): + raw_image_array[index, :, :, :] = item + + raw_images = raw_image_array + + # Current pipeline expects xarray of shape [fov/stack, tiffs, y, x] + return raw_images + + # predict on data + # need to have a dictionary of models to run + # curate-seg-track job diff --git a/caliban_toolbox/pre_annotation/data_loader_test.py b/caliban_toolbox/pre_annotation/data_loader_test.py new file mode 100644 index 0000000..a96aba6 --- /dev/null +++ b/caliban_toolbox/pre_annotation/data_loader_test.py @@ -0,0 +1,80 @@ +# Copyright 2016-2020 David Van Valen at California Institute of Technology +# (Caltech), with support from the Paul Allen Family Foundation, Google, +# & National Institutes of Health (NIH) under Grant U24CA224309-01. +# All rights reserved. +# +# Licensed under a modified Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE +# +# The Work provided may be used for non-commercial academic purposes only. +# For any other use of the Work, including commercial use, please contact: +# vanvalenlab@gmail.com +# +# Neither the name of Caltech nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific +# prior written permission. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for data_loader.py""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import random + +import pytest + +from caliban_toolbox.pre_annotation import data_loader + + +def _get_dummy_inputs(object): + possible_data_type = random.choice([['2d', 'static'], + ['2d', 'dynamic'], + ['3d', 'static'], + ['3d', 'dynamic']]) + possible_imaging_types = random.choice([['fluo'], ['phase'], ['fluo', 'phase'], ['all']]) + possible_specimen_types = random.choice([['HEK293'], ['HeLa'], ['HEK293', 'HeLa'], ['all']]) + possible_compartments = random.choice([[None], ['nuclear'], ['nuclear', 'wholecell'], ['all']]) + possible_markers = ['all'] + possible_exp_ids = ['all'] + possible_sessions = ['all'] + possible_positions = ['all'] + possible_file_type = '.tif' + + loader_inputs = [possible_data_type, + possible_imaging_types, + possible_specimen_types, + possible_compartments, + possible_markers, + possible_exp_ids, + possible_sessions, + possible_positions, + possible_file_type] + + return loader_inputs + + +class TestUniversalDataLoader(object): # pylint: disable=useless-object-inheritance + + def test_simple(self): + loader_inputs = _get_dummy_inputs(self) + + # test with standard inputs + _ = data_loader.UniversalDataLoader(data_type=loader_inputs[0], + imaging_types=loader_inputs[1], + specimen_types=loader_inputs[2], + compartments=loader_inputs[3], + markers=loader_inputs[4], + exp_ids=loader_inputs[5], + sessions=loader_inputs[6], + positions=loader_inputs[7], + file_type=loader_inputs[8]) diff --git a/caliban_toolbox/utils/__init__.py b/caliban_toolbox/utils/__init__.py index 3c0e89a..18ee062 100644 --- a/caliban_toolbox/utils/__init__.py +++ b/caliban_toolbox/utils/__init__.py @@ -23,4 +23,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Custom Utilities""" +"""Caliban Toolbox Utilities Module""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from caliban_toolbox.utils import crop_utils +from caliban_toolbox.utils import data_utils +from caliban_toolbox.utils import io_utils +from caliban_toolbox.utils import misc_utils +from caliban_toolbox.utils import plot_utils +from caliban_toolbox.utils import slice_utils +from caliban_toolbox.utils import widget_utils + +del absolute_import +del division +del print_function diff --git a/caliban_toolbox/utils/utils.py b/caliban_toolbox/utils/misc_utils.py old mode 100755 new mode 100644 similarity index 100% rename from caliban_toolbox/utils/utils.py rename to caliban_toolbox/utils/misc_utils.py diff --git a/caliban_toolbox/utils/utils_test.py b/caliban_toolbox/utils/misc_utils_test.py similarity index 89% rename from caliban_toolbox/utils/utils_test.py rename to caliban_toolbox/utils/misc_utils_test.py index 7cde6d1..a17811f 100644 --- a/caliban_toolbox/utils/utils_test.py +++ b/caliban_toolbox/utils/misc_utils_test.py @@ -29,15 +29,15 @@ import numpy as np -from caliban_toolbox.utils import utils +from caliban_toolbox.utils import misc_utils def test_sorted_nicely(): # test image file sorting expected = ['test_001_dapi', 'test_002_dapi', 'test_003_dapi'] unsorted = ['test_003_dapi', 'test_001_dapi', 'test_002_dapi'] - assert(np.array_equal(expected, utils.sorted_nicely(unsorted))) + assert(np.array_equal(expected, misc_utils.sorted_nicely(unsorted))) # test montage folder sorting expected = ['test_0_0', 'test_1_0', 'test_1_1'] unsorted = ['test_1_1', 'test_0_0', 'test_1_0'] - assert(np.array_equal(expected, utils.sorted_nicely(unsorted))) + assert(np.array_equal(expected, misc_utils.sorted_nicely(unsorted))) diff --git a/caliban_toolbox/utils/widget_utils.py b/caliban_toolbox/utils/widget_utils.py index dbfe05a..e8087b3 100644 --- a/caliban_toolbox/utils/widget_utils.py +++ b/caliban_toolbox/utils/widget_utils.py @@ -25,14 +25,16 @@ # ============================================================================== from __future__ import absolute_import -from skimage import filters, img_as_uint -import matplotlib as mpl -import matplotlib.pyplot as plt +import os import numpy as np import skimage as sk -import os +import matplotlib as mpl +import matplotlib.pyplot as plt + from imageio import imread -from caliban_toolbox.utils.utils import get_img_names +from skimage import filters + +from caliban_toolbox.utils.misc_utils import get_img_names def choose_img(name, dirpath): diff --git a/notebooks/Caliban_Figure8_Upload_Combined.ipynb b/notebooks/Caliban_Figure8_Upload_Combined.ipynb index 1f3dffa..eb11a54 100644 --- a/notebooks/Caliban_Figure8_Upload_Combined.ipynb +++ b/notebooks/Caliban_Figure8_Upload_Combined.ipynb @@ -11,7 +11,16 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/site-packages/xarray/core/merge.py:17: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", + " PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)\n" + ] + } + ], "source": [ "# import statements\n", "from __future__ import absolute_import\n", @@ -27,11 +36,11 @@ "from imageio import imread, volread, imwrite, volwrite\n", "from ipywidgets import fixed, interactive\n", "\n", + "import caliban_toolbox.pre_annotation.data_loader\n", "from caliban_toolbox import reshape_data\n", "from caliban_toolbox.figure_eight_functions import create_figure_eight_job, download_figure_eight_output\n", "from caliban_toolbox.utils import widget_utils, plot_utils, data_utils, io_utils\n", "\n", - "from segmentation.utils.data_utils import load_imgs_from_dir\n", "import xarray as xr\n", "\n", "import matplotlib as mpl\n", @@ -46,8 +55,178 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Load data for model training\n", - "We'll specify which channels will be used to generate preliminary labels for the model\n" + "## Load data\n", + "Specify what data we would like annotated. Data is selected according to its location within the CellNet ontology." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Data types are either dynamic/static and 2d/3d\n", + "data_type=('dynamic', '2d')\n", + "\n", + "# Imaging types include fluo, phase, or mibi (you can also specify 'all' to include everything)\n", + "imaging_types=['fluo', 'phase']\n", + "\n", + "# Specimen types are the cell or tissue name (e.g. HEK293, HeLa, TNBC) - use 'all' to include everything available\n", + "specimen_types=['HEK293']\n", + "\n", + "# Compartment of interest (e.g. nuclear or whole cell). 'all' can be used to include everything. \n", + "# The default compartment is None, which can only be used if the imaging type is phase\n", + "compartments=['nuclear']" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/data/raw_data/dynamic/2d/imaging_type/specimen_type/cytoplasm\n", + "empty directory\n", + "--------------------------------\n", + "/data/raw_data/dynamic/2d/imaging_type/specimen_type/nuclear\n", + "empty directory\n", + "--------------------------------\n", + "/data/raw_data/dynamic/2d/imaging_type/specimen_type/augmented_microscopy\n", + "empty directory\n", + "--------------------------------\n" + ] + } + ], + "source": [ + "from caliban_toolbox.pre_annotation.data_loader import UniversalDataLoader\n", + "load_test = UniversalDataLoader(data_type=data_type, \n", + " imaging_types=imaging_types,\n", + " specimen_types=specimen_types,\n", + " compartments=compartments)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warning! Path: /data/raw_data/dynamic/2d/phase/HEK293 Does Not Exist!\n" + ] + } + ], + "source": [ + "load_test.load_metadata()\n", + "raw_images = load_test.load_imagedata()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 71, 1080, 1280)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_images.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TYPECHANNEL_MARKEREXP_IDRAW_DATA_ORIGINIMAGING_PARAMETERSDIMENSIONSONTOLOGYMETHODSPATHS
0cell HEK293{'0': 'H2B-mClover'}journal_pcbi_1005177{'FACILITY': 'stanford', 'COLLECTED_BY': 'Taka...{'MICROSCOPE': 'Nikon Ti-E', 'CAMERA': 'Andor ...{'X': '1280', 'Y': '1080'}dynamic 2d fluorescence nuclear{'SUBTYPE': '', 'CULTURE': '', 'LABELING': '',...[/data/raw_data/dynamic/2d/fluo/HEK293/Nuclear...
\n", + "
" + ], + "text/plain": [ + " TYPE CHANNEL_MARKER EXP_ID \\\n", + "0 cell HEK293 {'0': 'H2B-mClover'} journal_pcbi_1005177 \n", + "\n", + " RAW_DATA_ORIGIN \\\n", + "0 {'FACILITY': 'stanford', 'COLLECTED_BY': 'Taka... \n", + "\n", + " IMAGING_PARAMETERS \\\n", + "0 {'MICROSCOPE': 'Nikon Ti-E', 'CAMERA': 'Andor ... \n", + "\n", + " DIMENSIONS ONTOLOGY \\\n", + "0 {'X': '1280', 'Y': '1080'} dynamic 2d fluorescence nuclear \n", + "\n", + " METHODS \\\n", + "0 {'SUBTYPE': '', 'CULTURE': '', 'LABELING': '',... \n", + "\n", + " PATHS \n", + "0 [/data/raw_data/dynamic/2d/fluo/HEK293/Nuclear... " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "load_test.metadata_all" ] }, { @@ -56,7 +235,7 @@ "metadata": {}, "outputs": [], "source": [ - "# TODO: Universal data loader" + "### End Universal data loader" ] }, { @@ -952,7 +1131,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.10" + "version": "3.7.7" } }, "nbformat": 4, diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..921efa2 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,5 @@ +# Running tests +pytest==4.6.5 +pytest-cov==2.5.1 +pytest-pep8>=1.0.6,<2.0.0 +coveralls>=1.8.2,<2.0.0 diff --git a/requirements.txt b/requirements.txt index 2197b51..186589d 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ pandas>=0.23.3,<1 -numpy>=1.14.5,<2 +numpy>=1.16.4,<2 scipy>=1.1.0,<2 -scikit-image>=0.13,<1 +scikit-image>=0.14.1,<=0.16.2 scikit-learn>=0.19.1,<1 jupyter>=1.0.0,<2 nbformat>=4.4.0,<5 @@ -10,4 +10,4 @@ requests>=2.21.0 boto3>=1.9.0 xarray==0.12.1 netCDF4==1.5.3 -segmentation-mibi>=0.2.3 +pathlib==1.0.1