Skip to content

Commit

Permalink
Clean up notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
MekWarrior committed May 14, 2020
1 parent b7a75c5 commit 3e69505
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 139 deletions.
8 changes: 4 additions & 4 deletions caliban_toolbox/pre_annotation/data_loader.py
Expand Up @@ -54,7 +54,7 @@ class UniversalDataLoader(object):
(random picks one file at random - best used for testing).
Args:
data type (list): CellNet data type ('dynamic/static', '2d/3d')
data type (tuple): CellNet data type ('dynamic/static', '2d/3d')
imaging types (list): imaging modality of interest ('fluo', 'phase', etc)
specimen types (list): specimen of interest (HEK293, HeLa, etc)
compartments (list): compartments of interest (nuclear, whole_cell)
Expand All @@ -69,7 +69,7 @@ class UniversalDataLoader(object):
(e.g. sessions=['all'])
Returns:
Numpy array with the shape [fovs, tifs, y_dim, x_dim]
Numpy array with the shape [fovs, z_dim(time or space), y_dim, x_dim]
Python dictionary containing metadata
"""

Expand All @@ -87,7 +87,7 @@ def __init__(self,
if compartments is None and imaging_types != ['phase']:
raise ValueError('Compartments is not specified')

self.data_type = set(data_type)
self.data_type = data_type
self.imaging_types = set(imaging_types)
self.specimen_types = set(specimen_types)
self.compartments = set(compartments)
Expand Down Expand Up @@ -115,7 +115,7 @@ def _vocab_check(self):
# Dictionaries of common spellings
img_fluo_misspell = {'flourescent', 'fluorescence', 'fluorescent', 'fluo'}
comp_nuc_misspell = {'nuc', 'nuclear'}
comp_wc_misspell = {'wholecell', 'whole_cell', }
comp_wc_misspell = {'wholecell', 'whole_cell', 'whole cell'}

# imaging_types - check for fluo misspellings
new_imaging_types = []
Expand Down
243 changes: 108 additions & 135 deletions notebooks/Caliban_Figure8_Upload_Combined.ipynb
Expand Up @@ -9,9 +9,18 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/site-packages/xarray/core/merge.py:17: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
" PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)\n"
]
}
],
"source": [
"# import statements\n",
"from __future__ import absolute_import\n",
Expand All @@ -27,11 +36,11 @@
"from imageio import imread, volread, imwrite, volwrite\n",
"from ipywidgets import fixed, interactive\n",
"\n",
"import caliban_toolbox.pre_annotation.data_loader\n",
"from caliban_toolbox import reshape_data\n",
"from caliban_toolbox.figure_eight_functions import create_figure_eight_job, download_figure_eight_output\n",
"from caliban_toolbox.utils import widget_utils, plot_utils, data_utils, io_utils\n",
"\n",
"from segmentation.utils.data_utils import load_imgs_from_dir\n",
"import xarray as xr\n",
"\n",
"import matplotlib as mpl\n",
Expand All @@ -46,17 +55,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load data for model training\n",
"We'll specify which channels will be used to generate preliminary labels for the model\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# TODO: Universal data loader"
"## Load data\n",
"Specify what data we would like annotated. Data is selected according to its location within the CellNet ontology."
]
},
{
Expand All @@ -65,52 +65,24 @@
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload"
"# Data types are either dynamic/static and 2d/3d\n",
"data_type=('dynamic', '2d')\n",
"\n",
"# Imaging types include fluo, phase, or mibi (you can also specify 'all' to include everything)\n",
"imaging_types=['fluo', 'phase']\n",
"\n",
"# Specimen types are the cell or tissue name (e.g. HEK293, HeLa, TNBC) - use 'all' to include everything available\n",
"specimen_types=['HEK293']\n",
"\n",
"# Compartment of interest (e.g. nuclear or whole cell). 'all' can be used to include everything. \n",
"# The default compartment is None, which can only be used if the imaging type is phase\n",
"compartments=['nuclear']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/site-packages/xarray/core/merge.py:17: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
" PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)\n"
]
},
{
"data": {
"text/plain": [
"<module 'caliban_toolbox.pre_annotation.data_loader' from '/usr/local/lib/python3.7/site-packages/caliban_toolbox/pre_annotation/data_loader.py'>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import importlib\n",
"import caliban_toolbox.pre_annotation.data_loader\n",
"importlib.reload(caliban_toolbox.pre_annotation.data_loader)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
Expand All @@ -130,15 +102,15 @@
],
"source": [
"from caliban_toolbox.pre_annotation.data_loader import UniversalDataLoader\n",
"load_test = UniversalDataLoader(data_type=['dynamic', '2d'], \n",
" imaging_types=['fluo', 'phase'],\n",
" specimen_types=['HEK293'],\n",
" compartments=['nuclear'])"
"load_test = UniversalDataLoader(data_type=data_type, \n",
" imaging_types=imaging_types,\n",
" specimen_types=specimen_types,\n",
" compartments=compartments)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -156,104 +128,105 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:16: DeprecationWarning: remove is deprecated. Use delete_one or delete_many instead.\n",
" app.launch_new_instance()\n",
"/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:17: DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead.\n"
]
},
{
"data": {
"text/plain": [
"{'_id': ObjectId('5eb1f48f497849bb54ef5ca0'),\n",
" 'TYPE': ['cell', 'HEK293'],\n",
" 'CHANNEL_MARKER': [{'0': 'H2B-mClover'}],\n",
" 'EXP_ID': ['journal_pcbi_1005177'],\n",
" 'RAW_DATA_ORIGIN': [{'FACILITY': 'stanford',\n",
" 'COLLECTED_BY': 'Takamasa Kudo',\n",
" 'DATE_COLLECTED': '11_13_2017',\n",
" 'DOI': ' https://doi.org/10.1371/journal.pcbi.1005177'}],\n",
" 'IMAGING_PARAMETERS': [{'MICROSCOPE': 'Nikon Ti-E',\n",
" 'CAMERA': 'Andor Neo 5.5',\n",
" 'MAGNIFICATION': '20x',\n",
" 'NA': '',\n",
" 'BINNING': '2x2',\n",
" 'PIXEL_SIZE': '0.65um',\n",
" 'EXPOSURE_TIME': '',\n",
" 'TIME_STEP': ''}],\n",
" 'DIMENSIONS': [{'X': '1280', 'Y': '1080'}],\n",
" 'ONTOLOGY': ['dynamic', '2d', 'fluorescence', 'nuclear'],\n",
" 'METHODS': [{'SUBTYPE': '', 'CULTURE': '', 'LABELING': '', 'IMAGING': ''}]}"
"(3, 71, 1080, 1280)"
]
},
"execution_count": 37,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pymongo \n",
"from pymongo import MongoClient\n",
"\n",
"mongo_uri = 'mongodb://%s:%s@%s:%s' % ('root', 'password', 'mongo', '27017')\n",
"client = MongoClient(mongo_uri)\n",
"mng_db = client.testdb\n",
"\n",
"collection_name = 'HEK293' # Replace mongo db collection name\n",
"db_cm = mng_db[collection_name]\n",
" \n",
"# Get the data from JSON file\n",
"mdf_path = os.path.join(path, 'metadata')\n",
"with open(mdf_path, 'r') as raw_mdf:\n",
" raw_data = json.load(raw_mdf)\n",
"\n",
"# Insert Data\n",
"db_cm.remove()\n",
"db_cm.insert(raw_data)"
"raw_images.shape"
]
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'CHANNEL_MARKER': [{'0': 'H2B-mClover'}],\n",
" 'DIMENSIONS': [{'X': '1280', 'Y': '1080'}],\n",
" 'EXP_ID': ['journal_pcbi_1005177'],\n",
" 'IMAGING_PARAMETERS': [{'BINNING': '2x2',\n",
" 'CAMERA': 'Andor Neo 5.5',\n",
" 'EXPOSURE_TIME': '',\n",
" 'MAGNIFICATION': '20x',\n",
" 'MICROSCOPE': 'Nikon Ti-E',\n",
" 'NA': '',\n",
" 'PIXEL_SIZE': '0.65um',\n",
" 'TIME_STEP': ''}],\n",
" 'METHODS': [{'CULTURE': '', 'IMAGING': '', 'LABELING': '', 'SUBTYPE': ''}],\n",
" 'ONTOLOGY': ['dynamic', '2d', 'fluorescence', 'nuclear'],\n",
" 'RAW_DATA_ORIGIN': [{'COLLECTED_BY': 'Takamasa Kudo',\n",
" 'DATE_COLLECTED': '11_13_2017',\n",
" 'DOI': ' https://doi.org/10.1371/journal.pcbi.1005177',\n",
" 'FACILITY': 'stanford'}],\n",
" 'TYPE': ['cell', 'HEK293'],\n",
" '_id': ObjectId('5eb1f48f497849bb54ef5ca0')}\n"
]
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>TYPE</th>\n",
" <th>CHANNEL_MARKER</th>\n",
" <th>EXP_ID</th>\n",
" <th>RAW_DATA_ORIGIN</th>\n",
" <th>IMAGING_PARAMETERS</th>\n",
" <th>DIMENSIONS</th>\n",
" <th>ONTOLOGY</th>\n",
" <th>METHODS</th>\n",
" <th>PATHS</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>cell HEK293</td>\n",
" <td>{'0': 'H2B-mClover'}</td>\n",
" <td>journal_pcbi_1005177</td>\n",
" <td>{'FACILITY': 'stanford', 'COLLECTED_BY': 'Taka...</td>\n",
" <td>{'MICROSCOPE': 'Nikon Ti-E', 'CAMERA': 'Andor ...</td>\n",
" <td>{'X': '1280', 'Y': '1080'}</td>\n",
" <td>dynamic 2d fluorescence nuclear</td>\n",
" <td>{'SUBTYPE': '', 'CULTURE': '', 'LABELING': '',...</td>\n",
" <td>[/data/raw_data/dynamic/2d/fluo/HEK293/Nuclear...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" TYPE CHANNEL_MARKER EXP_ID \\\n",
"0 cell HEK293 {'0': 'H2B-mClover'} journal_pcbi_1005177 \n",
"\n",
" RAW_DATA_ORIGIN \\\n",
"0 {'FACILITY': 'stanford', 'COLLECTED_BY': 'Taka... \n",
"\n",
" IMAGING_PARAMETERS \\\n",
"0 {'MICROSCOPE': 'Nikon Ti-E', 'CAMERA': 'Andor ... \n",
"\n",
" DIMENSIONS ONTOLOGY \\\n",
"0 {'X': '1280', 'Y': '1080'} dynamic 2d fluorescence nuclear \n",
"\n",
" METHODS \\\n",
"0 {'SUBTYPE': '', 'CULTURE': '', 'LABELING': '',... \n",
"\n",
" PATHS \n",
"0 [/data/raw_data/dynamic/2d/fluo/HEK293/Nuclear... "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Query data\n",
"import pprint\n",
"for metadata_file in db_cm.find({'TYPE': ['cell', 'HEK293']}):\n",
" pprint.pprint(metadata_file)"
"load_test.metadata_all"
]
},
{
Expand Down

0 comments on commit 3e69505

Please sign in to comment.