Clean up notebook

vanvalenlab · May 14, 2020 · 3e69505 · 3e69505
1 parent b7a75c5
commit 3e69505
Show file tree

Hide file tree

Showing 2 changed files with 112 additions and 139 deletions.
diff --git a/caliban_toolbox/pre_annotation/data_loader.py b/caliban_toolbox/pre_annotation/data_loader.py
@@ -54,7 +54,7 @@ class UniversalDataLoader(object):
     (random picks one file at random - best used for testing).
 
     Args:
-        data type (list): CellNet data type ('dynamic/static', '2d/3d')
+        data type (tuple): CellNet data type ('dynamic/static', '2d/3d')
         imaging types (list): imaging modality of interest ('fluo', 'phase', etc)
         specimen types (list): specimen of interest (HEK293, HeLa, etc)
         compartments (list): compartments of interest (nuclear, whole_cell)
@@ -69,7 +69,7 @@ class UniversalDataLoader(object):
               (e.g. sessions=['all'])
 
     Returns:
-        Numpy array with the shape [fovs, tifs, y_dim, x_dim]
+        Numpy array with the shape [fovs, z_dim(time or space), y_dim, x_dim]
         Python dictionary containing metadata
     """
 
@@ -87,7 +87,7 @@ def __init__(self,
         if compartments is None and imaging_types != ['phase']:
             raise ValueError('Compartments is not specified')
 
-        self.data_type = set(data_type)
+        self.data_type = data_type
         self.imaging_types = set(imaging_types)
         self.specimen_types = set(specimen_types)
         self.compartments = set(compartments)
@@ -115,7 +115,7 @@ def _vocab_check(self):
         # Dictionaries of common spellings
         img_fluo_misspell = {'flourescent', 'fluorescence', 'fluorescent', 'fluo'}
         comp_nuc_misspell = {'nuc', 'nuclear'}
-        comp_wc_misspell = {'wholecell', 'whole_cell', }
+        comp_wc_misspell = {'wholecell', 'whole_cell', 'whole cell'}
 
         # imaging_types - check for fluo misspellings
         new_imaging_types = []

diff --git a/notebooks/Caliban_Figure8_Upload_Combined.ipynb b/notebooks/Caliban_Figure8_Upload_Combined.ipynb
@@ -9,9 +9,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.7/site-packages/xarray/core/merge.py:17: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
+      "  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)\n"
+     ]
+    }
+   ],
    "source": [
     "# import statements\n",
     "from __future__ import absolute_import\n",
@@ -27,11 +36,11 @@
     "from imageio import imread, volread, imwrite, volwrite\n",
     "from ipywidgets import fixed, interactive\n",
     "\n",
+    "import caliban_toolbox.pre_annotation.data_loader\n",
     "from caliban_toolbox import reshape_data\n",
     "from caliban_toolbox.figure_eight_functions import create_figure_eight_job, download_figure_eight_output\n",
     "from caliban_toolbox.utils import widget_utils, plot_utils, data_utils, io_utils\n",
     "\n",
-    "from segmentation.utils.data_utils import load_imgs_from_dir\n",
     "import xarray as xr\n",
     "\n",
     "import matplotlib as mpl\n",
@@ -46,17 +55,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Load data for model training\n",
-    "We'll specify which channels will be used to generate preliminary labels for the model\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TODO: Universal data loader"
+    "## Load data\n",
+    "Specify what data we would like annotated. Data is selected according to its location within the CellNet ontology."
    ]
   },
   {
@@ -65,52 +65,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load_ext autoreload"
+    "# Data types are either dynamic/static and 2d/3d\n",
+    "data_type=('dynamic', '2d')\n",
+    "\n",
+    "# Imaging types include fluo, phase, or mibi (you can also specify 'all' to include everything)\n",
+    "imaging_types=['fluo', 'phase']\n",
+    "\n",
+    "# Specimen types are the cell or tissue name (e.g. HEK293, HeLa, TNBC) - use 'all' to include everything available\n",
+    "specimen_types=['HEK293']\n",
+    "\n",
+    "# Compartment of interest (e.g. nuclear or whole cell). 'all' can be used to include everything. \n",
+    "# The default compartment is None, which can only be used if the imaging type is phase\n",
+    "compartments=['nuclear']"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.7/site-packages/xarray/core/merge.py:17: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
-      "  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<module 'caliban_toolbox.pre_annotation.data_loader' from '/usr/local/lib/python3.7/site-packages/caliban_toolbox/pre_annotation/data_loader.py'>"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import importlib\n",
-    "import caliban_toolbox.pre_annotation.data_loader\n",
-    "importlib.reload(caliban_toolbox.pre_annotation.data_loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -130,15 +102,15 @@
    ],
    "source": [
     "from caliban_toolbox.pre_annotation.data_loader import UniversalDataLoader\n",
-    "load_test = UniversalDataLoader(data_type=['dynamic', '2d'], \n",
-    "                                imaging_types=['fluo', 'phase'],\n",
-    "                                specimen_types=['HEK293'],\n",
-    "                                compartments=['nuclear'])"
+    "load_test = UniversalDataLoader(data_type=data_type, \n",
+    "                                imaging_types=imaging_types,\n",
+    "                                specimen_types=specimen_types,\n",
+    "                                compartments=compartments)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -156,104 +128,105 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:16: DeprecationWarning: remove is deprecated. Use delete_one or delete_many instead.\n",
-      "  app.launch_new_instance()\n",
-      "/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:17: DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead.\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "{'_id': ObjectId('5eb1f48f497849bb54ef5ca0'),\n",
-       " 'TYPE': ['cell', 'HEK293'],\n",
-       " 'CHANNEL_MARKER': [{'0': 'H2B-mClover'}],\n",
-       " 'EXP_ID': ['journal_pcbi_1005177'],\n",
-       " 'RAW_DATA_ORIGIN': [{'FACILITY': 'stanford',\n",
-       "   'COLLECTED_BY': 'Takamasa Kudo',\n",
-       "   'DATE_COLLECTED': '11_13_2017',\n",
-       "   'DOI': ' https://doi.org/10.1371/journal.pcbi.1005177'}],\n",
-       " 'IMAGING_PARAMETERS': [{'MICROSCOPE': 'Nikon Ti-E',\n",
-       "   'CAMERA': 'Andor Neo 5.5',\n",
-       "   'MAGNIFICATION': '20x',\n",
-       "   'NA': '',\n",
-       "   'BINNING': '2x2',\n",
-       "   'PIXEL_SIZE': '0.65um',\n",
-       "   'EXPOSURE_TIME': '',\n",
-       "   'TIME_STEP': ''}],\n",
-       " 'DIMENSIONS': [{'X': '1280', 'Y': '1080'}],\n",
-       " 'ONTOLOGY': ['dynamic', '2d', 'fluorescence', 'nuclear'],\n",
-       " 'METHODS': [{'SUBTYPE': '', 'CULTURE': '', 'LABELING': '', 'IMAGING': ''}]}"
+       "(3, 71, 1080, 1280)"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "import pymongo \n",
-    "from pymongo import MongoClient\n",
-    "\n",
-    "mongo_uri = 'mongodb://%s:%s@%s:%s' % ('root', 'password', 'mongo', '27017')\n",
-    "client = MongoClient(mongo_uri)\n",
-    "mng_db = client.testdb\n",
-    "\n",
-    "collection_name = 'HEK293' # Replace mongo db collection name\n",
-    "db_cm = mng_db[collection_name]\n",
-    " \n",
-    "# Get the data from JSON file\n",
-    "mdf_path = os.path.join(path, 'metadata')\n",
-    "with open(mdf_path, 'r') as raw_mdf:\n",
-    "    raw_data = json.load(raw_mdf)\n",
-    "\n",
-    "# Insert Data\n",
-    "db_cm.remove()\n",
-    "db_cm.insert(raw_data)"
+    "raw_images.shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'CHANNEL_MARKER': [{'0': 'H2B-mClover'}],\n",
-      " 'DIMENSIONS': [{'X': '1280', 'Y': '1080'}],\n",
-      " 'EXP_ID': ['journal_pcbi_1005177'],\n",
-      " 'IMAGING_PARAMETERS': [{'BINNING': '2x2',\n",
-      "                         'CAMERA': 'Andor Neo 5.5',\n",
-      "                         'EXPOSURE_TIME': '',\n",
-      "                         'MAGNIFICATION': '20x',\n",
-      "                         'MICROSCOPE': 'Nikon Ti-E',\n",
-      "                         'NA': '',\n",
-      "                         'PIXEL_SIZE': '0.65um',\n",
-      "                         'TIME_STEP': ''}],\n",
-      " 'METHODS': [{'CULTURE': '', 'IMAGING': '', 'LABELING': '', 'SUBTYPE': ''}],\n",
-      " 'ONTOLOGY': ['dynamic', '2d', 'fluorescence', 'nuclear'],\n",
-      " 'RAW_DATA_ORIGIN': [{'COLLECTED_BY': 'Takamasa Kudo',\n",
-      "                      'DATE_COLLECTED': '11_13_2017',\n",
-      "                      'DOI': ' https://doi.org/10.1371/journal.pcbi.1005177',\n",
-      "                      'FACILITY': 'stanford'}],\n",
-      " 'TYPE': ['cell', 'HEK293'],\n",
-      " '_id': ObjectId('5eb1f48f497849bb54ef5ca0')}\n"
-     ]
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>TYPE</th>\n",
+       "      <th>CHANNEL_MARKER</th>\n",
+       "      <th>EXP_ID</th>\n",
+       "      <th>RAW_DATA_ORIGIN</th>\n",
+       "      <th>IMAGING_PARAMETERS</th>\n",
+       "      <th>DIMENSIONS</th>\n",
+       "      <th>ONTOLOGY</th>\n",
+       "      <th>METHODS</th>\n",
+       "      <th>PATHS</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>cell HEK293</td>\n",
+       "      <td>{'0': 'H2B-mClover'}</td>\n",
+       "      <td>journal_pcbi_1005177</td>\n",
+       "      <td>{'FACILITY': 'stanford', 'COLLECTED_BY': 'Taka...</td>\n",
+       "      <td>{'MICROSCOPE': 'Nikon Ti-E', 'CAMERA': 'Andor ...</td>\n",
+       "      <td>{'X': '1280', 'Y': '1080'}</td>\n",
+       "      <td>dynamic 2d fluorescence nuclear</td>\n",
+       "      <td>{'SUBTYPE': '', 'CULTURE': '', 'LABELING': '',...</td>\n",
+       "      <td>[/data/raw_data/dynamic/2d/fluo/HEK293/Nuclear...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          TYPE        CHANNEL_MARKER                EXP_ID  \\\n",
+       "0  cell HEK293  {'0': 'H2B-mClover'}  journal_pcbi_1005177   \n",
+       "\n",
+       "                                     RAW_DATA_ORIGIN  \\\n",
+       "0  {'FACILITY': 'stanford', 'COLLECTED_BY': 'Taka...   \n",
+       "\n",
+       "                                  IMAGING_PARAMETERS  \\\n",
+       "0  {'MICROSCOPE': 'Nikon Ti-E', 'CAMERA': 'Andor ...   \n",
+       "\n",
+       "                   DIMENSIONS                         ONTOLOGY  \\\n",
+       "0  {'X': '1280', 'Y': '1080'}  dynamic 2d fluorescence nuclear   \n",
+       "\n",
+       "                                             METHODS  \\\n",
+       "0  {'SUBTYPE': '', 'CULTURE': '', 'LABELING': '',...   \n",
+       "\n",
+       "                                               PATHS  \n",
+       "0  [/data/raw_data/dynamic/2d/fluo/HEK293/Nuclear...  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "# Query data\n",
-    "import pprint\n",
-    "for metadata_file in db_cm.find({'TYPE': ['cell', 'HEK293']}):\n",
-    "    pprint.pprint(metadata_file)"
+    "load_test.metadata_all"
    ]
   },
   {