In [1]:
import datajoint as dj
import numpy as np
import json
from subprocess import call
import yaml
import sys, os
import pandas as pd
import ray

sys.path.append('./lib')
from utilities import *
sys.path.append('../lib')
from utils import run

In [2]:
def setup_download_from_s3( rel_fp, recursive=True ):
    s3_fp = 's3://mousebrainatlas-data/'+rel_fp
    local_fp = os.environ['ROOT_DIR']+rel_fp

    if os.path.exists(local_fp):
        print('ALREADY DOWNLOADED FILE')
        return 
    
    if recursive:
        ! aws s3 cp --recursive $s3_fp $local_fp
    else:
        ! aws s3 cp $s3_fp $local_fp

In [3]:
setup_download_from_s3('CSHL_new_regions/MD589/')

ALREADY DOWNLOADED FILE


In [4]:
credFiles= '/data/Github/VaultBrain/credFiles_aws.yaml'
#'/Users/kuiqian/Github/VaultBrain/credFiles.yaml'
dj.config['database.host'] = get_dj_creds(credFiles)['database.host']
dj.config['database.user'] = get_dj_creds(credFiles)['database.user']
dj.config['database.port'] = get_dj_creds(credFiles)['database.port']
dj.config['database.password'] = get_dj_creds(credFiles)['database.password']
dj.conn()

Connecting kui@ucsd-demo-db.datajoint.io:3306


  credFiles = yaml.load(open( credential_file_pointers,'r'))


DataJoint connection (connected) kui@ucsd-demo-db.datajoint.io:3306

In [5]:
schema = dj.schema('kui_diffusionmap')
schema.spawn_missing_classes()

In [6]:
stack = 'MD589'
yaml_file = 'shape_params-aws.yaml'
scripts_dir = os.environ['REPO_DIR']

In [5]:
@schema
class Structure2(dj.Manual):
    definition = """
    id  : int   # sequence
    --------
    structure       : char(10)    # name of mouse brain's structure
    positive_s3_fp  : varchar(200)  # (str) file path
    negative_s3_fp  : varchar(200)  # (str) file path
    """

In [6]:
paired_structures = ['5N', '6N', '7N', '7n', 'Amb', 'LC', 'LRt', 'Pn', 'Tz', 'VLL', 'RMC', \
                     'SNC', 'SNR', '3N', '4N', 'Sp5I', 'Sp5O', 'Sp5C', 'PBG', '10N', 'VCA', 'VCP', 'DC']
singular_structures = ['AP', '12N', 'RtTg', 'SC', 'IC']
all_structures = paired_structures + singular_structures
stack = 'MD589'
s3_fp = 'CSHL_new_regions_features/'+stack+'/' #s3://mousebrainatlas-data/
print(s3_fp)

CSHL_new_regions_features/MD589/


In [12]:
n=0
for struc in all_structures:
    print("\nAdding "+struc+' to the database')
    Structure2.insert1(dict(id=n,
                           structure=struc,
                           positive_s3_fp=s3_fp+struc+'/'+stack+'_'+struc+'_positive.pkl',
                           negative_s3_fp=s3_fp+struc+'/'+stack+'_'+struc+'_negative.pkl',)
                      ,skip_duplicates=False)
    n+=1


Adding 5N to the database

Adding 6N to the database

Adding 7N to the database

Adding 7n to the database

Adding Amb to the database

Adding LC to the database

Adding LRt to the database

Adding Pn to the database

Adding Tz to the database

Adding VLL to the database

Adding RMC to the database

Adding SNC to the database

Adding SNR to the database

Adding 3N to the database

Adding 4N to the database

Adding Sp5I to the database

Adding Sp5O to the database

Adding Sp5C to the database

Adding PBG to the database

Adding 10N to the database

Adding VCA to the database

Adding VCP to the database

Adding DC to the database

Adding AP to the database

Adding 12N to the database

Adding RtTg to the database

Adding SC to the database

Adding IC to the database


In [7]:
structureTable = Structure2.fetch(as_dict=True)
strucDF = pd.DataFrame(structureTable)
strucDF

Unnamed: 0,id,structure,positive_s3_fp,negative_s3_fp
0,0,5N,CSHL_new_regions_features/MD589/5N/MD589_5N_po...,CSHL_new_regions_features/MD589/5N/MD589_5N_ne...
1,1,6N,CSHL_new_regions_features/MD589/6N/MD589_6N_po...,CSHL_new_regions_features/MD589/6N/MD589_6N_ne...
2,2,7N,CSHL_new_regions_features/MD589/7N/MD589_7N_po...,CSHL_new_regions_features/MD589/7N/MD589_7N_ne...
3,3,7n,CSHL_new_regions_features/MD589/7n/MD589_7n_po...,CSHL_new_regions_features/MD589/7n/MD589_7n_ne...
4,4,Amb,CSHL_new_regions_features/MD589/Amb/MD589_Amb_...,CSHL_new_regions_features/MD589/Amb/MD589_Amb_...
5,5,LC,CSHL_new_regions_features/MD589/LC/MD589_LC_po...,CSHL_new_regions_features/MD589/LC/MD589_LC_ne...
6,6,LRt,CSHL_new_regions_features/MD589/LRt/MD589_LRt_...,CSHL_new_regions_features/MD589/LRt/MD589_LRt_...
7,7,Pn,CSHL_new_regions_features/MD589/Pn/MD589_Pn_po...,CSHL_new_regions_features/MD589/Pn/MD589_Pn_ne...
8,8,Tz,CSHL_new_regions_features/MD589/Tz/MD589_Tz_po...,CSHL_new_regions_features/MD589/Tz/MD589_Tz_ne...
9,9,VLL,CSHL_new_regions_features/MD589/VLL/MD589_VLL_...,CSHL_new_regions_features/MD589/VLL/MD589_VLL_...


In [11]:
Process2.drop()

`kui_diffusionmap`.`__process2` (28 tuples)
Proceed? [yes, No]: yes
Tables dropped.  Restart kernel.


In [12]:
@schema
class Process2(dj.Computed):
    definition="""
    -> Structure2
    -----
    size_positive_properties : int   #size of file
    size_negative_properties : int   #size of file
    """

    bucket = "mousebrainatlas-data"
    client = get_s3_client(credFiles)
    def make(self, key):
        struc = (Structure2 & key).fetch1('structure')
        print('populating for ', struc, end='\n')
        for state in ['positive', 'negative']:
            item_name = state+'_s3_fp'
            key_item = 'size_'+state+'_properties'
            s3_fp = (Structure2 & key).fetch1(item_name)
            try:
                report = self.client.stat_object(self.bucket, s3_fp)
                key[key_item] = int(report.size/1000)
            except:
                ! python3 $scripts_dir/Cell_generator.py $stack $struc $state $yaml_file
                upload_fp = 's3://'+self.bucket+'/'+s3_fp
                pkl_out_file = os.environ['ROOT_DIR']+s3_fp
                ! aws s3 cp $pkl_out_file $upload_fp
                print(pkl_out_file,upload_fp)
                report = self.client.stat_object(self.bucket, s3_fp)
                key[key_item] = int(report.size / 1000)
        try:
            self.insert1(key)
        except:
            print('could not insert key=', key)


  credFiles = yaml.load(open( credential_file_pointers,'r'))


In [13]:
Process2.populate()

populating for  5N
2019-06-21 15:24:29,097	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-06-21_15-24-29_097462_4203/logs.
2019-06-21 15:24:29,205	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:61115 to respond...
2019-06-21 15:24:29,313	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:58591 to respond...
2019-06-21 15:24:29,314	INFO services.py:806 -- Starting Redis shard with 10.0 GB max memory.
2019-06-21 15:24:29,324	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-06-21_15-24-29_097462_4203/logs.
2019-06-21 15:24:29,325	INFO services.py:1442 -- Starting the Plasma object store with 20.0 GB memory using /dev/shm.
  self.D=yaml.load(open(yamlFile,'r'))
/data/BstemAtlasDataBackup/ucsd_brain/CSHL_new_regions_features/MD589/
[2m[36m(pid=4315)[0m 5N_positive 1 0 / 44
[2m[36m(pid=4315)[0m 5N_positive 11 10 / 44
[2m[36m(pid=4315)[0m 5N_positive 21 20 / 44
[2m[36m(pi