In [1]:
import datajoint as dj
import numpy as np
import json
from subprocess import call
import yaml
import sys, os
import pandas as pd
import ray

sys.path.append('./lib')
from utilities import *
sys.path.append('../lib')
from utils import run

In [2]:
def setup_download_from_s3( rel_fp, recursive=True ):
    s3_fp = 's3://mousebrainatlas-data/'+rel_fp
    local_fp = os.environ['ROOT_DIR']+rel_fp

    if os.path.exists(local_fp):
        print('ALREADY DOWNLOADED FILE')
        return 
    
    if recursive:
        ! aws s3 cp --recursive $s3_fp $local_fp
    else:
        ! aws s3 cp $s3_fp $local_fp

In [3]:
setup_download_from_s3('CSHL_new_regions/MD589/')

ALREADY DOWNLOADED FILE


In [2]:
#credFiles= '/data/Github/VaultBrain/credFiles_aws.yaml'
credFiles= '/Users/kuiqian/Github/VaultBrain/credFiles.yaml'
dj.config['database.host'] = get_dj_creds(credFiles)['database.host']
dj.config['database.user'] = get_dj_creds(credFiles)['database.user']
dj.config['database.port'] = get_dj_creds(credFiles)['database.port']
dj.config['database.password'] = get_dj_creds(credFiles)['database.password']
dj.conn()

  credFiles = yaml.load(open( credential_file_pointers,'r'))


Connecting kui@ucsd-demo-db.datajoint.io:3306


DataJoint connection (connected) kui@ucsd-demo-db.datajoint.io:3306

In [3]:
schema = dj.schema('kui_diffusionmap')
schema.spawn_missing_classes()

In [4]:
stack = 'MD589'
#yaml_file = 'shape_params-aws.yaml'
yaml_file = 'shape_params.yaml'
scripts_dir = os.environ['REPO_DIR']

In [5]:
@schema
class Structure3(dj.Manual):
    definition = """
    id  : int   # sequence
    --------
    structure       : char(10)    # name of mouse brain's structure
    positive_s3_fp  : varchar(200)  # (str) file path
    negative_s3_fp  : varchar(200)  # (str) file path
    """

In [6]:
paired_structures = ['5N', '6N', '7n', 'Amb', 'LC', 'LRt', 'Pn', 'Tz', 'VLL', 'RMC', \
                     'SNC', 'SNR', '3N', '4N', 'Sp5I', 'Sp5O', 'Sp5C', 'PBG', '10N', 'VCA', 'VCP', 'DC']
singular_structures = ['AP', '12N', 'RtTg', 'SC', 'IC'] #, '7N'
all_structures = paired_structures + singular_structures
stack = 'MD589'
s3_fp = 'CSHL_patches_features/'+stack+'/' #s3://mousebrainatlas-data/
print(s3_fp)

CSHL_patches_features/MD589/


In [8]:
n=0
for struc in all_structures:
    print("\nAdding "+struc+' to the database')
    Structure3.insert1(dict(id=n,
                           structure=struc,
                           positive_s3_fp=s3_fp+struc+'/'+stack+'_'+struc+'_positive.pkl',
                           negative_s3_fp=s3_fp+struc+'/'+stack+'_'+struc+'_negative.pkl',)
                      ,skip_duplicates=False)
    n+=1


Adding 5N to the database

Adding 6N to the database

Adding 7n to the database

Adding Amb to the database

Adding LC to the database

Adding LRt to the database

Adding Pn to the database

Adding Tz to the database

Adding VLL to the database

Adding RMC to the database

Adding SNC to the database

Adding SNR to the database

Adding 3N to the database

Adding 4N to the database

Adding Sp5I to the database

Adding Sp5O to the database

Adding Sp5C to the database

Adding PBG to the database

Adding 10N to the database

Adding VCA to the database

Adding VCP to the database

Adding DC to the database

Adding AP to the database

Adding 12N to the database

Adding RtTg to the database

Adding SC to the database

Adding IC to the database


In [9]:
structureTable = Structure3.fetch(as_dict=True)
strucDF = pd.DataFrame(structureTable)
strucDF

Unnamed: 0,id,structure,positive_s3_fp,negative_s3_fp
0,0,5N,CSHL_patches_features/MD589/5N/MD589_5N_positi...,CSHL_patches_features/MD589/5N/MD589_5N_negati...
1,1,6N,CSHL_patches_features/MD589/6N/MD589_6N_positi...,CSHL_patches_features/MD589/6N/MD589_6N_negati...
2,2,7n,CSHL_patches_features/MD589/7n/MD589_7n_positi...,CSHL_patches_features/MD589/7n/MD589_7n_negati...
3,3,Amb,CSHL_patches_features/MD589/Amb/MD589_Amb_posi...,CSHL_patches_features/MD589/Amb/MD589_Amb_nega...
4,4,LC,CSHL_patches_features/MD589/LC/MD589_LC_positi...,CSHL_patches_features/MD589/LC/MD589_LC_negati...
5,5,LRt,CSHL_patches_features/MD589/LRt/MD589_LRt_posi...,CSHL_patches_features/MD589/LRt/MD589_LRt_nega...
6,6,Pn,CSHL_patches_features/MD589/Pn/MD589_Pn_positi...,CSHL_patches_features/MD589/Pn/MD589_Pn_negati...
7,7,Tz,CSHL_patches_features/MD589/Tz/MD589_Tz_positi...,CSHL_patches_features/MD589/Tz/MD589_Tz_negati...
8,8,VLL,CSHL_patches_features/MD589/VLL/MD589_VLL_posi...,CSHL_patches_features/MD589/VLL/MD589_VLL_nega...
9,9,RMC,CSHL_patches_features/MD589/RMC/MD589_RMC_posi...,CSHL_patches_features/MD589/RMC/MD589_RMC_nega...


In [13]:
Process3.drop()

`kui_diffusionmap`.`__process3` (3 tuples)
Proceed? [yes, No]: yes
Tables dropped.  Restart kernel.


In [14]:
@schema
class Process3(dj.Computed):
    definition="""
    -> Structure2
    -----
    size_positive_properties : int   #size of file
    size_negative_properties : int   #size of file
    """

    bucket = "mousebrainatlas-data"
    client = get_s3_client(credFiles)
    def make(self, key):
        struc = (Structure3 & key).fetch1('structure')
        print('populating for ', struc, end='\n')
        for state in ['positive', 'negative']:
            item_name = state+'_s3_fp'
            key_item = 'size_'+state+'_properties'
            s3_fp = (Structure3 & key).fetch1(item_name)
            try:
                report = self.client.stat_object(self.bucket, s3_fp)
                key[key_item] = int(report.size/1000)
            except:
                ! python3 $scripts_dir/Cell_generator.py $stack $struc $state $yaml_file
                upload_fp = 's3://'+self.bucket+'/'+s3_fp
                pkl_out_file = os.environ['ROOT_DIR']+s3_fp
                ! aws s3 cp $pkl_out_file $upload_fp
                print(pkl_out_file,upload_fp)
                report = self.client.stat_object(self.bucket, s3_fp)
                key[key_item] = int(report.size / 1000)
        try:
            self.insert1(key)
        except:
            print('could not insert key=', key)


  credFiles = yaml.load(open( credential_file_pointers,'r'))


In [15]:
Process3.populate()

populating for  5N
populating for  6N
populating for  7n
populating for  Amb
  import imp
2019-06-24 23:48:55,093	INFO node.py:469 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-06-24_23-48-55_88085/logs.
2019-06-24 23:48:55,200	INFO services.py:407 -- Waiting for redis server at 127.0.0.1:39738 to respond...
2019-06-24 23:48:55,333	INFO services.py:407 -- Waiting for redis server at 127.0.0.1:42302 to respond...
2019-06-24 23:48:55,337	INFO services.py:804 -- Starting Redis shard with 1.72 GB max memory.
2019-06-24 23:48:55,356	INFO node.py:483 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-06-24_23-48-55_88085/logs.
2019-06-24 23:48:55,357	INFO services.py:1427 -- Starting the Plasma object store with 2.58 GB memory using /tmp.
  self.D=yaml.load(open(yamlFile,'r'))
/Users/kuiqian/BstemAtlasDataBackup/ucsd_brain/CSHL_patches_features/MD589/
[2m[36m(pid=88099)[0m Amb_positive ALREADY EXIST
upload: ../../../../BstemAtlasDataBacku

NoSuchKey: NoSuchKey: message: The specified key does not exist.