## Imports
utilities.py contains a number of useful functions including loading credentials from a json file, retrieving files from S3, and defining the current list of brains that are associated with the Active Atlas.

In [1]:
import datajoint as dj
import numpy as np
from minio import Minio
import json
import yaml
import sys, os

sys.path.append('./lib')
from utilities import *

## Load Credentials, Connect to Database

In [2]:
%%writefile /data/Github/VaultBrain/credFiles_aws.yaml
aws_fp: /data/Github/VaultBrain/s3-creds.json
dj_fp: /data/Github/VaultBrain/dj_local_conf.json

Writing /data/Github/VaultBrain/credFiles_aws.yaml


In [6]:
%%writefile /Users/kuiqian/Github/VaultBrain/credFiles.yaml
aws_fp: /Users/kuiqian/Github/VaultBrain/s3-creds.json
dj_fp: /Users/kuiqian/Github/VaultBrain/dj_local_conf.json

Writing /Users/kuiqian/Github/VaultBrain/credFiles.yaml


In [3]:
credFiles='/data/Github/VaultBrain/credFiles_aws.yaml'
#'/Users/kuiqian/Github/VaultBrain/credFiles.yaml'

In [4]:
dj.config['database.host'] = get_dj_creds(credFiles)['database.host']
dj.config['database.user'] = get_dj_creds(credFiles)['database.user']
dj.config['database.port'] = get_dj_creds(credFiles)['database.port']
dj.config['database.password'] = get_dj_creds(credFiles)['database.password']
#dj.config.save_local()

  credFiles = yaml.load(open( credential_file_pointers,'r'))


In [5]:
dj.conn()

Connecting kui@ucsd-demo-db.datajoint.io:3306


DataJoint connection (connected) kui@ucsd-demo-db.datajoint.io:3306

In [6]:
schema = dj.schema('kui_diffusionmap')
schema.spawn_missing_classes()

## Create tables

In [12]:
Structure.drop()

`kui_diffusionmap`.`structure` (28 tuples)
Proceed? [yes, No]: yes
Tables dropped.  Restart kernel.


In [13]:
@schema
class Structure(dj.Manual):
    definition = """
    id  : int   # sequence
    --------
    structure       : char(10)    # name of mouse brain's structure
    positive_s3_fp  : varchar(200)  # (str) file path
    negative_s3_fp  : varchar(200)  # (str) file path
    positive_img_fp  : varchar(200)  # (str) file path
    negative_img_fp  : varchar(200)  # (str) file path
    """


In [14]:
paired_structures = ['5N', '6N', '7N', '7n', 'Amb', 'LC', 'LRt', 'Pn', 'Tz', 'VLL', 'RMC', \
                     'SNC', 'SNR', '3N', '4N', 'Sp5I', 'Sp5O', 'Sp5C', 'PBG', '10N', 'VCA', 'VCP', 'DC']
singular_structures = ['AP', '12N', 'RtTg', 'SC', 'IC']
all_structures = paired_structures + singular_structures
stack = 'MD589'
s3_fp = 'CSHL_cells_features/'+stack+'/' #s3://mousebrainatlas-data/
print(s3_fp)

CSHL_cells_features/MD589/


In [15]:
n=0
for struc in all_structures:
    print("\nAdding "+struc+' to the database')
    Structure.insert1(dict(id=n,
                           structure=struc,
                          positive_s3_fp=s3_fp+'Properties/'+struc+'/'+stack+'_'+struc+'_positive.pkl',
                          negative_s3_fp=s3_fp+'Properties/'+struc+'/'+stack+'_'+struc+'_negative.pkl',
                          positive_img_fp=s3_fp+'Images/'+struc+'/'+stack+'_'+struc+'_positive_images.pkl',
                          negative_img_fp=s3_fp+'Images/'+struc+'/'+stack+'_'+struc+'_negative_images.pkl')
                     ,skip_duplicates=False)
    n+=1



Adding 5N to the database

Adding 6N to the database

Adding 7N to the database

Adding 7n to the database

Adding Amb to the database

Adding LC to the database

Adding LRt to the database

Adding Pn to the database

Adding Tz to the database

Adding VLL to the database

Adding RMC to the database

Adding SNC to the database

Adding SNR to the database

Adding 3N to the database

Adding 4N to the database

Adding Sp5I to the database

Adding Sp5O to the database

Adding Sp5C to the database

Adding PBG to the database

Adding 10N to the database

Adding VCA to the database

Adding VCP to the database

Adding DC to the database

Adding AP to the database

Adding 12N to the database

Adding RtTg to the database

Adding SC to the database

Adding IC to the database


In [16]:
import pandas as pd
structureTable = Structure.fetch(as_dict=True)
strucDF = pd.DataFrame(structureTable)
strucDF

Unnamed: 0,id,structure,positive_s3_fp,negative_s3_fp,positive_img_fp,negative_img_fp
0,0,5N,CSHL_cells_features/MD589/Properties/5N/MD589_...,CSHL_cells_features/MD589/Properties/5N/MD589_...,CSHL_cells_features/MD589/Images/5N/MD589_5N_p...,CSHL_cells_features/MD589/Images/5N/MD589_5N_n...
1,1,6N,CSHL_cells_features/MD589/Properties/6N/MD589_...,CSHL_cells_features/MD589/Properties/6N/MD589_...,CSHL_cells_features/MD589/Images/6N/MD589_6N_p...,CSHL_cells_features/MD589/Images/6N/MD589_6N_n...
2,2,7N,CSHL_cells_features/MD589/Properties/7N/MD589_...,CSHL_cells_features/MD589/Properties/7N/MD589_...,CSHL_cells_features/MD589/Images/7N/MD589_7N_p...,CSHL_cells_features/MD589/Images/7N/MD589_7N_n...
3,3,7n,CSHL_cells_features/MD589/Properties/7n/MD589_...,CSHL_cells_features/MD589/Properties/7n/MD589_...,CSHL_cells_features/MD589/Images/7n/MD589_7n_p...,CSHL_cells_features/MD589/Images/7n/MD589_7n_n...
4,4,Amb,CSHL_cells_features/MD589/Properties/Amb/MD589...,CSHL_cells_features/MD589/Properties/Amb/MD589...,CSHL_cells_features/MD589/Images/Amb/MD589_Amb...,CSHL_cells_features/MD589/Images/Amb/MD589_Amb...
5,5,LC,CSHL_cells_features/MD589/Properties/LC/MD589_...,CSHL_cells_features/MD589/Properties/LC/MD589_...,CSHL_cells_features/MD589/Images/LC/MD589_LC_p...,CSHL_cells_features/MD589/Images/LC/MD589_LC_n...
6,6,LRt,CSHL_cells_features/MD589/Properties/LRt/MD589...,CSHL_cells_features/MD589/Properties/LRt/MD589...,CSHL_cells_features/MD589/Images/LRt/MD589_LRt...,CSHL_cells_features/MD589/Images/LRt/MD589_LRt...
7,7,Pn,CSHL_cells_features/MD589/Properties/Pn/MD589_...,CSHL_cells_features/MD589/Properties/Pn/MD589_...,CSHL_cells_features/MD589/Images/Pn/MD589_Pn_p...,CSHL_cells_features/MD589/Images/Pn/MD589_Pn_n...
8,8,Tz,CSHL_cells_features/MD589/Properties/Tz/MD589_...,CSHL_cells_features/MD589/Properties/Tz/MD589_...,CSHL_cells_features/MD589/Images/Tz/MD589_Tz_p...,CSHL_cells_features/MD589/Images/Tz/MD589_Tz_n...
9,9,VLL,CSHL_cells_features/MD589/Properties/VLL/MD589...,CSHL_cells_features/MD589/Properties/VLL/MD589...,CSHL_cells_features/MD589/Images/VLL/MD589_VLL...,CSHL_cells_features/MD589/Images/VLL/MD589_VLL...


In [11]:
client = get_s3_client(credFiles)

In [15]:
report=client.stat_object("mousebrainatlas-data",strucDF['positive_s3_fp'][1])
report.size

14878904

In [53]:
sys.path.append('../lib')
from utils import run

In [18]:
stack = 'MD589'
yaml_file = 'shape_params.yaml'
scripts_dir = os.environ['REPO_DIR']
struc = '5N'
state = 'positive'
run('python3 {0}/Cell_generator.py {1} {2} {3} {4}'.format(scripts_dir, stack, struc, state, yaml_file))

NameError: name 'run' is not defined

In [12]:
%%writefile test.py
print('this is me!')


Writing test.py


In [13]:
run('python test.py')

ERROR:root:File `'(python test.py).py'` not found.


In [21]:
!python test.py

this is me!


In [16]:
!cat output

this is me!


In [None]:
# %load /data/Github/shapeology_code/scripts/Cell_generator.py
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("stack", type=str, help="The name of the stack")
parser.add_argument("structure", type=str, help="The nth group of structures")
parser.add_argument("state", type=str, help="Positive or negative samples")
parser.add_argument("yaml", type=str, help="Path to Yaml file with parameters")
args = parser.parse_args()
stack = args.stack
struc = args.structure
state = args.state
import cv2
#from cv2 import moments,HuMoments
import pickle
import numpy as np


import os
import sys
from time import time
from glob import glob
from extractPatches import patch_extractor
#from label_patch import diffusionMap
#from patch_normalizer import normalizer
from lib.utils import mark_contours, configuration, run

import ray
ray.init(object_store_memory=70000000000,redis_max_memory=30000000000)

@ray.remote
def generator(structure, state, cell_dir, patch_dir, stack, params):
    for state in [state]:
        t1 = time()
        savepath = cell_dir + structure + '/'
        pkl_out_file = savepath+stack+'_'+structure+'_'+state+'.pkl'

        if os.path.exists(pkl_out_file):
            print(structure +'_'+state+ ' ALREADY EXIST')
            continue
        else:
            if not os.path.exists(savepath):
                os.mkdir(savepath)

        if state=='positive':
            patches = [dir for dir in glob(patch_dir+structure+'/*')]
        else:
            patches = [dir for dir in glob(patch_dir+structure+'_surround_200um_noclass/*')]

        cells=[]
        save=0
        for i in range(len(patches)):
            extractor=patch_extractor(patches[i],params)
            tile=cv2.imread(patches[i],0)
            if params['preprocessing']['polarity']==-1:
                tile = 255-tile
            min_std=params['preprocessing']['min_std']
            _std = np.std(tile.flatten())

            if _std < min_std:
                continue #print('image',patches[i],'std=',_std, 'too blank, skipping')
            else:
                Stats=extractor.segment_cells(tile)
                extracted= extractor.extract_blobs(Stats,tile)
                cells.extend(extracted)
                    # for j in range(len(extracted)):
                    #     try:
                    #         filename=savepath+str(extracted[j]['padded_size'])+'/'+str(count)+'.tif'
                    #         count+=1
                    #         img=extracted[j]['padded_patch']
                    #         img=img/img.max()*255
                    #         img=img.astype(np.uint8)
                    #         cv2.imwrite(filename, img)
                    #     except:
                    #         continue
                count = len(cells)
                if 0<=count%20000 and count%20000<=30:
                    print(structure + '_'+state, count,i,'/',len(patches))
                if count>100000 and save==0:
                    print(structure, i,len(patches))
                    save=1
                    pkl_out = savepath + stack + '_' + structure + '_' + state + '_part.pkl'
                    pickle.dump(cells, open(pkl_out, 'wb'))
        print(structure + '_'+state,count)
        pickle.dump(cells, open(pkl_out_file, 'wb'))
        #s3_directory = 's3://mousebrainatlas-data/CSHL_cells_dm/'+stack+'/'+structure+'/'
        #run('aws s3 cp "{0}" {1}/'.format(pkl_out_file,s3_directory))
        print(structure + '_'+state+ ' finished in %5.1f seconds' % (time() - t1))

yamlfile=os.environ['REPO_DIR']+args.yaml
params=configuration(yamlfile).getParams()

paired_structures = ['5N', '6N', '7N', '7n', 'Amb', 'LC', 'LRt', 'Pn', 'Tz', 'VLL', 'RMC', \
                     'SNC', 'SNR', '3N', '4N', 'Sp5I', 'Sp5O', 'Sp5C', 'PBG', '10N', 'VCA', 'VCP', 'DC']
singular_structures = ['AP', '12N', 'RtTg', 'SC', 'IC']

all_structures = paired_structures + singular_structures
patch_dir = os.environ['ROOT_DIR']+'CSHL_patches/'+stack+'/'
if not os.path.exists(os.environ['ROOT_DIR']+'CSHL_cells_dm/'):
    os.mkdir(os.environ['ROOT_DIR']+'CSHL_cells_dm/')
cell_dir = os.environ['ROOT_DIR']+'CSHL_cells_dm/'+stack+'/'
print(cell_dir)
if not os.path.exists(cell_dir):
    os.mkdir(cell_dir)

#t0=time()

#assert structure

ray.get(generator.remote(struc, state, cell_dir, patch_dir, stack, params))


#print('Finished in %5.1f seconds'%(time()-t0))

