From 3e69505eebe4544840227a070c1ab72c179e8f35 Mon Sep 17 00:00:00 2001 From: MekWarrior Date: Thu, 14 May 2020 15:04:26 -0700 Subject: [PATCH] Clean up notebook --- caliban_toolbox/pre_annotation/data_loader.py | 8 +- .../Caliban_Figure8_Upload_Combined.ipynb | 243 ++++++++---------- 2 files changed, 112 insertions(+), 139 deletions(-) diff --git a/caliban_toolbox/pre_annotation/data_loader.py b/caliban_toolbox/pre_annotation/data_loader.py index 1eb46ba..00409d9 100644 --- a/caliban_toolbox/pre_annotation/data_loader.py +++ b/caliban_toolbox/pre_annotation/data_loader.py @@ -54,7 +54,7 @@ class UniversalDataLoader(object): (random picks one file at random - best used for testing). Args: - data type (list): CellNet data type ('dynamic/static', '2d/3d') + data type (tuple): CellNet data type ('dynamic/static', '2d/3d') imaging types (list): imaging modality of interest ('fluo', 'phase', etc) specimen types (list): specimen of interest (HEK293, HeLa, etc) compartments (list): compartments of interest (nuclear, whole_cell) @@ -69,7 +69,7 @@ class UniversalDataLoader(object): (e.g. sessions=['all']) Returns: - Numpy array with the shape [fovs, tifs, y_dim, x_dim] + Numpy array with the shape [fovs, z_dim(time or space), y_dim, x_dim] Python dictionary containing metadata """ @@ -87,7 +87,7 @@ def __init__(self, if compartments is None and imaging_types != ['phase']: raise ValueError('Compartments is not specified') - self.data_type = set(data_type) + self.data_type = data_type self.imaging_types = set(imaging_types) self.specimen_types = set(specimen_types) self.compartments = set(compartments) @@ -115,7 +115,7 @@ def _vocab_check(self): # Dictionaries of common spellings img_fluo_misspell = {'flourescent', 'fluorescence', 'fluorescent', 'fluo'} comp_nuc_misspell = {'nuc', 'nuclear'} - comp_wc_misspell = {'wholecell', 'whole_cell', } + comp_wc_misspell = {'wholecell', 'whole_cell', 'whole cell'} # imaging_types - check for fluo misspellings new_imaging_types = [] diff --git a/notebooks/Caliban_Figure8_Upload_Combined.ipynb b/notebooks/Caliban_Figure8_Upload_Combined.ipynb index d50c901..eb11a54 100644 --- a/notebooks/Caliban_Figure8_Upload_Combined.ipynb +++ b/notebooks/Caliban_Figure8_Upload_Combined.ipynb @@ -9,9 +9,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/site-packages/xarray/core/merge.py:17: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", + " PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)\n" + ] + } + ], "source": [ "# import statements\n", "from __future__ import absolute_import\n", @@ -27,11 +36,11 @@ "from imageio import imread, volread, imwrite, volwrite\n", "from ipywidgets import fixed, interactive\n", "\n", + "import caliban_toolbox.pre_annotation.data_loader\n", "from caliban_toolbox import reshape_data\n", "from caliban_toolbox.figure_eight_functions import create_figure_eight_job, download_figure_eight_output\n", "from caliban_toolbox.utils import widget_utils, plot_utils, data_utils, io_utils\n", "\n", - "from segmentation.utils.data_utils import load_imgs_from_dir\n", "import xarray as xr\n", "\n", "import matplotlib as mpl\n", @@ -46,17 +55,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Load data for model training\n", - "We'll specify which channels will be used to generate preliminary labels for the model\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: Universal data loader" + "## Load data\n", + "Specify what data we would like annotated. Data is selected according to its location within the CellNet ontology." ] }, { @@ -65,52 +65,24 @@ "metadata": {}, "outputs": [], "source": [ - "%load_ext autoreload" + "# Data types are either dynamic/static and 2d/3d\n", + "data_type=('dynamic', '2d')\n", + "\n", + "# Imaging types include fluo, phase, or mibi (you can also specify 'all' to include everything)\n", + "imaging_types=['fluo', 'phase']\n", + "\n", + "# Specimen types are the cell or tissue name (e.g. HEK293, HeLa, TNBC) - use 'all' to include everything available\n", + "specimen_types=['HEK293']\n", + "\n", + "# Compartment of interest (e.g. nuclear or whole cell). 'all' can be used to include everything. \n", + "# The default compartment is None, which can only be used if the imaging type is phase\n", + "compartments=['nuclear']" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], - "source": [ - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.7/site-packages/xarray/core/merge.py:17: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", - " PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import importlib\n", - "import caliban_toolbox.pre_annotation.data_loader\n", - "importlib.reload(caliban_toolbox.pre_annotation.data_loader)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, "outputs": [ { "name": "stdout", @@ -130,15 +102,15 @@ ], "source": [ "from caliban_toolbox.pre_annotation.data_loader import UniversalDataLoader\n", - "load_test = UniversalDataLoader(data_type=['dynamic', '2d'], \n", - " imaging_types=['fluo', 'phase'],\n", - " specimen_types=['HEK293'],\n", - " compartments=['nuclear'])" + "load_test = UniversalDataLoader(data_type=data_type, \n", + " imaging_types=imaging_types,\n", + " specimen_types=specimen_types,\n", + " compartments=compartments)" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -156,104 +128,105 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 7, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:16: DeprecationWarning: remove is deprecated. Use delete_one or delete_many instead.\n", - " app.launch_new_instance()\n", - "/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:17: DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead.\n" - ] - }, { "data": { "text/plain": [ - "{'_id': ObjectId('5eb1f48f497849bb54ef5ca0'),\n", - " 'TYPE': ['cell', 'HEK293'],\n", - " 'CHANNEL_MARKER': [{'0': 'H2B-mClover'}],\n", - " 'EXP_ID': ['journal_pcbi_1005177'],\n", - " 'RAW_DATA_ORIGIN': [{'FACILITY': 'stanford',\n", - " 'COLLECTED_BY': 'Takamasa Kudo',\n", - " 'DATE_COLLECTED': '11_13_2017',\n", - " 'DOI': ' https://doi.org/10.1371/journal.pcbi.1005177'}],\n", - " 'IMAGING_PARAMETERS': [{'MICROSCOPE': 'Nikon Ti-E',\n", - " 'CAMERA': 'Andor Neo 5.5',\n", - " 'MAGNIFICATION': '20x',\n", - " 'NA': '',\n", - " 'BINNING': '2x2',\n", - " 'PIXEL_SIZE': '0.65um',\n", - " 'EXPOSURE_TIME': '',\n", - " 'TIME_STEP': ''}],\n", - " 'DIMENSIONS': [{'X': '1280', 'Y': '1080'}],\n", - " 'ONTOLOGY': ['dynamic', '2d', 'fluorescence', 'nuclear'],\n", - " 'METHODS': [{'SUBTYPE': '', 'CULTURE': '', 'LABELING': '', 'IMAGING': ''}]}" + "(3, 71, 1080, 1280)" ] }, - "execution_count": 37, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import pymongo \n", - "from pymongo import MongoClient\n", - "\n", - "mongo_uri = 'mongodb://%s:%s@%s:%s' % ('root', 'password', 'mongo', '27017')\n", - "client = MongoClient(mongo_uri)\n", - "mng_db = client.testdb\n", - "\n", - "collection_name = 'HEK293' # Replace mongo db collection name\n", - "db_cm = mng_db[collection_name]\n", - " \n", - "# Get the data from JSON file\n", - "mdf_path = os.path.join(path, 'metadata')\n", - "with open(mdf_path, 'r') as raw_mdf:\n", - " raw_data = json.load(raw_mdf)\n", - "\n", - "# Insert Data\n", - "db_cm.remove()\n", - "db_cm.insert(raw_data)" + "raw_images.shape" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 8, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'CHANNEL_MARKER': [{'0': 'H2B-mClover'}],\n", - " 'DIMENSIONS': [{'X': '1280', 'Y': '1080'}],\n", - " 'EXP_ID': ['journal_pcbi_1005177'],\n", - " 'IMAGING_PARAMETERS': [{'BINNING': '2x2',\n", - " 'CAMERA': 'Andor Neo 5.5',\n", - " 'EXPOSURE_TIME': '',\n", - " 'MAGNIFICATION': '20x',\n", - " 'MICROSCOPE': 'Nikon Ti-E',\n", - " 'NA': '',\n", - " 'PIXEL_SIZE': '0.65um',\n", - " 'TIME_STEP': ''}],\n", - " 'METHODS': [{'CULTURE': '', 'IMAGING': '', 'LABELING': '', 'SUBTYPE': ''}],\n", - " 'ONTOLOGY': ['dynamic', '2d', 'fluorescence', 'nuclear'],\n", - " 'RAW_DATA_ORIGIN': [{'COLLECTED_BY': 'Takamasa Kudo',\n", - " 'DATE_COLLECTED': '11_13_2017',\n", - " 'DOI': ' https://doi.org/10.1371/journal.pcbi.1005177',\n", - " 'FACILITY': 'stanford'}],\n", - " 'TYPE': ['cell', 'HEK293'],\n", - " '_id': ObjectId('5eb1f48f497849bb54ef5ca0')}\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TYPECHANNEL_MARKEREXP_IDRAW_DATA_ORIGINIMAGING_PARAMETERSDIMENSIONSONTOLOGYMETHODSPATHS
0cell HEK293{'0': 'H2B-mClover'}journal_pcbi_1005177{'FACILITY': 'stanford', 'COLLECTED_BY': 'Taka...{'MICROSCOPE': 'Nikon Ti-E', 'CAMERA': 'Andor ...{'X': '1280', 'Y': '1080'}dynamic 2d fluorescence nuclear{'SUBTYPE': '', 'CULTURE': '', 'LABELING': '',...[/data/raw_data/dynamic/2d/fluo/HEK293/Nuclear...
\n", + "
" + ], + "text/plain": [ + " TYPE CHANNEL_MARKER EXP_ID \\\n", + "0 cell HEK293 {'0': 'H2B-mClover'} journal_pcbi_1005177 \n", + "\n", + " RAW_DATA_ORIGIN \\\n", + "0 {'FACILITY': 'stanford', 'COLLECTED_BY': 'Taka... \n", + "\n", + " IMAGING_PARAMETERS \\\n", + "0 {'MICROSCOPE': 'Nikon Ti-E', 'CAMERA': 'Andor ... \n", + "\n", + " DIMENSIONS ONTOLOGY \\\n", + "0 {'X': '1280', 'Y': '1080'} dynamic 2d fluorescence nuclear \n", + "\n", + " METHODS \\\n", + "0 {'SUBTYPE': '', 'CULTURE': '', 'LABELING': '',... \n", + "\n", + " PATHS \n", + "0 [/data/raw_data/dynamic/2d/fluo/HEK293/Nuclear... " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Query data\n", - "import pprint\n", - "for metadata_file in db_cm.find({'TYPE': ['cell', 'HEK293']}):\n", - " pprint.pprint(metadata_file)" + "load_test.metadata_all" ] }, {