In [83]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [84]:
import ipas.collection_from_db.database as database
import ipas.collection_from_db.iceagg_collection as collect
from ipas.collection_from_db.calculations import ClusterCalculations

import numpy as np
import dask
import pandas as pd
import random
from dask_jobqueue import SLURMCluster
from dask.distributed import Client
from dask import delayed
import glob

In [85]:
cluster = SLURMCluster(
    queue='kratos',
    walltime='04-23:00:00',
    cores=1,
    memory='1000MiB', #1 GiB = 1,024 MiB
    processes=1)

#cluster.adapt(minimum=3, maximum=20)
cluster.scale(1)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 44285 instead
  http_address["port"], self.http_server.port


In [86]:
client = Client(cluster)

In [94]:
client

0,1
Client  Scheduler: tcp://169.226.65.141:44927  Dashboard: http://169.226.65.141:44285/status,Cluster  Workers: 1  Cores: 1  Memory: 0.98 GiB


## read databases

In [88]:
orientation = 'rand'  # chose which orientation (rand or flat)
if orientation == 'rand':
    files = glob.glob("/network/rit/lab/sulialab/share/IPAS/ipas/instance_files/createdb_iceagg_rand*")
    # randomly orient the seed crystal and new crystal: uses first random orientation
    rand_orient = True  
else:
    files = glob.glob("/network/rit/lab/sulialab/share/IPAS/ipas/instance_files/createdb_iceagg_flat*")
    # randomly orient the seed crystal and new crystal: uses first random orientation
    rand_orient = False     

In [89]:
db = database.Database(files)
db.read_database()
db.append_shape()
db.truncate_agg_r(5000)
db.append_agg_phi()
df = db.df  # get the dataframe (db is an instance of database.py module)

reading:  /network/rit/lab/sulialab/share/IPAS/ipas/instance_files/createdb_iceagg_rand_r500_1000
reading:  /network/rit/lab/sulialab/share/IPAS/ipas/instance_files/createdb_iceagg_rand_r1_5
reading:  /network/rit/lab/sulialab/share/IPAS/ipas/instance_files/createdb_iceagg_rand_r6_10
reading:  /network/rit/lab/sulialab/share/IPAS/ipas/instance_files/createdb_iceagg_rand_r20_70
reading:  /network/rit/lab/sulialab/share/IPAS/ipas/instance_files/createdb_iceagg_rand_r80_400


# MAIN

In [90]:
# bounds for the monomer aspect ratio to be pulled from distribution
# factor of 10 = +/-10* aspect ratio of monomer in aggs
phi_factor=10
r_factor=2
phi_range = np.logspace(np.log(1.0/phi_factor)/np.log(10),
                                        np.log(1.0*phi_factor)/np.log(10), 20)
r_range = np.logspace(np.log(100/r_factor)/np.log(10),\
                                      np.log(100*r_factor)/np.log(10),20)

#number of bins to split database into
agg_phi_bins = 20
agg_r_bins = 20

# number of aggregates to create per bin
nclusters = 3

In [96]:
def main():
    output = np.empty((agg_phi_bins, agg_r_bins),dtype=object)
    hold_clusters  = np.empty((agg_phi_bins, agg_r_bins, nclusters), dtype=object)
    a  = np.empty((agg_phi_bins, agg_r_bins, nclusters), dtype=object)
    c = np.empty((agg_phi_bins, agg_r_bins, nclusters), dtype=object)

    res, phi_bins = pd.qcut(df.agg_phi, agg_phi_bins, retbins=True)

    for i in range(agg_phi_bins):
        #print('agg phi range: ', phi_bins[i], phi_bins[i+1])
        #return a df that only queries within an aspect ratio bin
        df_phi = df[(df.agg_phi > phi_bins[i]) & (df.agg_phi < phi_bins[i+1])]  
        #to ensure at least 2 crystals within agg since ncrystals=1 not in db
        #now break that aspect ratio bin into 20 equal r bins

        res, r_bins = pd.qcut(df_phi.agg_r, agg_r_bins, retbins=True)
        for r in range(agg_r_bins): #agg r
            #print('r = ', r_bins[r], r_bins[r+1])
            df_r = df_phi[(df_phi.agg_r > r_bins[r]) & (df_phi.agg_r < r_bins[r+1])]

            samples = df_r.sample(nclusters)
            for n, agg in enumerate(samples.itertuples()):
                #print('agg mono phi', agg.mono_phi)
                phi_range = np.logspace(np.log(agg.mono_phi/phi_factor)/np.log(10),
                                        np.log(agg.mono_phi*phi_factor)/np.log(10), 20)
                mono_phi = random.choice(phi_range)
                mono_phi = 0.01 if mono_phi < 0.01 else mono_phi
                mono_phi = 70 if mono_phi > 70 else mono_phi

                r_range = np.logspace(np.log(agg.mono_r/r_factor)/np.log(10),\
                                      np.log(agg.mono_r*r_factor)/np.log(10),20)
                mono_r = random.choice(r_range)
                mono_r = 1000 if mono_r>1000 else mono_r
                mono_r = 1 if mono_r<1 else mono_r

                a[i,r,n] = (mono_r ** 3 / mono_phi) ** (1. / 3.)
                c[i,r,n] = mono_phi * a[i,r,n]
#               print('phi range', agg.mono_phi, phi_range[0], phi_range[-1], phi_range)
#               print('r range: ', agg.mono_r, r_range[0], r_range[-1], r_range)

                hold_clusters[i,r,n] = ClusterCalculations(agg)

            # test without dask
            #collect.collect_clusters_iceagg(a[i,r,:], c[i,r,:], hold_clusters[i,r,:], rand_orient=rand_orient)
            # using dask in parallel
            output[i,r] = dask.delayed(collect.collect_clusters_iceagg)(a[i,r,:], c[i,r,:],
                                                                         hold_clusters[i,r,:],
                                                                         rand_orient=rand_orient)

    return output, hold_clusters

In [97]:
def compute():
    agg_as = np.empty((agg_phi_bins, agg_r_bins, nclusters))
    agg_bs = np.empty((agg_phi_bins, agg_r_bins, nclusters))
    rzs = np.empty((agg_phi_bins, agg_r_bins, nclusters))
    phi2Ds = np.empty((agg_phi_bins, agg_r_bins, nclusters))
    cplxs = np.empty((agg_phi_bins, agg_r_bins, nclusters))
    dds = np.empty((agg_phi_bins, agg_r_bins, nclusters))

    gather = client.compute([*output.tolist()])
    gather = client.gather(gather)
    gather = np.array(gather)

    agg_as = gather[:,:,0,:]
    agg_bs = gather[:,:,1,:]
    agg_cs = gather[:,:,2,:]
    phi2Ds = gather[:,:,3,:]
    cplxs = gather[:,:,4,:]
    dds = gather[:,:,5,:]

    print('DONE!')
    return agg_as, agg_bs, agg_cs, phi2Ds, cplxs, dds

In [98]:
if __name__ == '__main__':
    output, hold_clusters = main()
    agg_as, agg_bs, agg_cs, phi2Ds, cplxs, dds= compute()
    results = {'agg_as': agg_as, 'agg_bs':agg_bs, 'agg_cs':agg_cs, 'phi2Ds':phi2Ds, \
               'cplxs':cplxs, 'dds':dds}


IndexError: too many indices for array: array is 2-dimensional, but 4 were indexed

In [None]:
# pickle data to files
filename = '../instance_files/pulled_clusters_iceagg_rand'
filehandler = open(filename, 'wb')
to_file = np.append(hold_clusters)
pickle.dump(to_file, filehandler)
filehandler.close()
print('finished!')

filename = '../instance_files/instance_db_iceagg_rand'
filehandler = open(filename, 'wb')
pickle.dump(results, filehandler)
filehandler.close()
print('finished!')