In [None]:
#Reloads modules to update any changes (after saving)
#If a new method or object is created, autoreload doesn't work and the 
#kernel needs to be restarted
%load_ext autoreload
%autoreload 2
%load_ext memory_profiler

In [None]:
import numpy as np
import pickle
import glob
import random
import pandas as pd
import matplotlib.pyplot as plt
from dask import dataframe as dd

import sys
sys.path.append('../../collection_from_db')
sys.path.append('../../scripts')
import ipas
import batch_statistics

# Load data

In [104]:
orientation='flat'

In [105]:
f = open('../../instance_files/instance_db_aggagg_'+orientation, 'rb')
results = pickle.load(f)
agg_as, agg_bs, agg_cs, phi2Ds, cplxs, dds= \
                results['agg_as'], results['agg_bs'], results['agg_cs'], results['phi2Ds'], results['cplxs'], results['dds']
f.close()

f = open('../../instance_files/pulled_clusters_aggagg_'+orientation, 'rb')
pulled_clus = pickle.load(f)
f.close()

In [106]:
np.shape(agg_as)

(20, 20, 300)

In [107]:
agg_phi_bins = np.shape(agg_as)[0]
agg_r_bins = np.shape(agg_as)[1]
nclusters = np.shape(agg_as)[2]

In [108]:
%%time
#read db 
df = dd.read_parquet("/network/rit/lab/sulialab/share/IPAS_3radii/instance_files/parquet_files/*_"+orientation+"_*", engine="pyarrow").compute()

CPU times: user 4.14 s, sys: 3.92 s, total: 8.06 s
Wall time: 11.3 s


In [109]:
def shape(a,b,c):
    if (b-c) <= (a-b):
        return 'prolate'
    else:
        return 'oblate'

In [110]:
%%time 
df['agg_r'] = np.power((np.power(df['a'], 2) * df['c']), (1./3.))
df = df[df['agg_r'] < 5000]
vfunc = np.vectorize(shape)
df['shape'] = vfunc(df['a'], df['b'], df['c'])
df['agg_phi'] = df['c']/df['a']

CPU times: user 2.83 s, sys: 1.18 s, total: 4 s
Wall time: 8.66 s


In [None]:
#old, slower way of reading in database:

In [None]:
files = [f for f in glob.glob("../instance_files/createdb_iceagg_flat*")]

In [None]:
%%time 
data = []
for file in files:
    print(file)
    data.append(pd.read_pickle(file, None))
datapd = [pd.DataFrame(i) for i in data]
df = pd.concat(datapd, axis=0, ignore_index=True)
df['agg_r'] = np.power((np.power(df['a'], 2) * df['c']), (1./3.))
df['agg_r'] = df['agg_r'][df.agg_r < 5000]
df['shape'] = df.apply(lambda row: 'prolate' if (row.b - row.c) <= (row.a - row.b) else 'oblate', axis=1)
df['agg_phi'] = df.c/df.a

In [None]:
#start running cells again here

In [111]:
res, phi_bins = pd.qcut(df.agg_phi, 20, retbins=True)
#print(phi_bins)
phi_bin_labs = []
all_r_bins= np.empty((len(phi_bins),len(phi_bins)))
for i in range(agg_phi_bins):
    phi_bin_labs.append('[%.3f-%.3f]' %(phi_bins[i],phi_bins[i+1]))
    #return a df that only queries within an aspect ratio bin
    df_phi = df[(df.agg_phi > phi_bins[i]) & (df.agg_phi < phi_bins[i+1])]
    #now break that aspect ratio bin into 20 equal r bins
    res, r_bins = pd.qcut(df_phi.agg_r, 20, retbins=True)
    all_r_bins[i,:] = r_bins


In [112]:
#find characteristic of gamma distribution for axis lengths
#find mode from histogram bins for density change (multiple modes using statistics.mode())
#all calculations are in the batch_statistics module in the scripts folder
#takes a few mins to run

agg_cs_ch = np.empty((agg_phi_bins, agg_r_bins), dtype=np.float64) 
agg_as_ch = np.empty((agg_phi_bins, agg_r_bins), dtype=np.float64) 
agg_as_mean = np.empty((agg_phi_bins, agg_r_bins), dtype=np.float64)
dds_mode = np.empty((agg_phi_bins, agg_r_bins), dtype=np.float64)

for i in range(agg_phi_bins):
    for r in range(agg_r_bins):
        #print(i,r)
        for c, data in enumerate([agg_cs, agg_as, dds]):
            batch = batch_statistics.Batch(data[i,r,:])

            if c == 0:
                batch.fit_distribution()
                agg_cs_ch[i,r] = batch.gamma_ch
                
            if c == 1:
                batch.fit_distribution()
                agg_as_ch[i,r] = batch.gamma_ch
                agg_as_mean[i,r] = batch.mean
                
            if c == 2:
                batch.mode_of_hist()
                dds_mode[i,r] = batch.mode
       


In [114]:
#write to file for output as array:
#flat orientation
#bin edges for range
with open("../../lookup_tables/agg_agg/newformat_minorax_ellipsoid_flat_ch_binedges.dat","w") as file1:
    with open("../../lookup_tables/agg_agg/newformat_majorax_ellipsoid_flat_ch_binedges.dat","w") as file2:
        with open("../../lookup_tables/agg_agg/newformat_majorax_ellipsoid_flat_mean_binedges.dat","w") as file3:
            with open("../../lookup_tables/agg_agg/newformat_dd_flat_mode_binedges.dat","w") as file4:

                    file1.write('Agg-Agg collection for the flat orientation. \n'\
                                'Characteristic values taken from the peak of a fit \n'\
                                'gamma distribution from 300 aggregates. \n'\
                                'Minor axis taken as the smallest axis from the fit-ellipsoid \n'\
                                'Ranges are taken from the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')

                    file2.write('Agg-Agg collection for the flat orientation. \n'
                                'Characteristic values taken from the peak of a fit \n'\
                                'gamma distribution from 300 aggregates. \n'\
                                'Ranges are taken from the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')

                    file3.write('Agg-Agg collection for the flat orientation. \n'
                                'Mean value taken from the average across 300 aggregates. \n'\
                                'Major axis taken as the largest axis from the fit-ellipsoid \n'
                                'Ranges are taken from the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')
                    
                    file4.write('Agg-Agg collection for the flat orientation. \n'
                                'Avg volume ratio of aggs subtracted from volume ratio of new agg (Vagg/Vellipse)\n'\
                                'Mode from 300 aggregates. \n'\
                                'Ranges are taken from the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')


                    for i in range(agg_phi_bins):
                        for r in range(agg_r_bins):

                            #print(i,r)
                            file1.write('%.3f %.3f %.3f %.3f %.3f \n' %(phi_bins[i], phi_bins[i+1], all_r_bins[i,r], all_r_bins[i,r+1], agg_cs_ch[i,r]))
                            file2.write('%.3f %.3f %.3f %.3f %.3f \n' %(phi_bins[i], phi_bins[i+1], all_r_bins[i,r], all_r_bins[i,r+1], agg_as_ch[i,r]))
                            file3.write('%.3f %.3f %.3f %.3f %.3f \n' %(phi_bins[i], phi_bins[i+1], all_r_bins[i,r], all_r_bins[i,r+1], agg_as_mean[i,r]))
                            file4.write('%.3f %.3f %.3f %.3f %.3f \n' %(phi_bins[i], phi_bins[i+1], all_r_bins[i,r], all_r_bins[i,r+1], dds_mode[i,r]))
                                    
file1.close()
file2.close() 
file3.close()
file4.close()

In [113]:
#write to file for output as array
#flat orienation
#sampled aggs for range
with open("../../lookup_tables/agg_agg/newformat_minorax_ellipsoid_flat_ch.dat","w") as file1:
    with open("../../lookup_tables/agg_agg/newformat_majorax_ellipsoid_flat_ch.dat","w") as file2:
        with open("../../lookup_tables/agg_agg/newformat_majorax_ellipsoid_flat_mean.dat","w") as file3:
            with open("../../lookup_tables/agg_agg/newformat_dd_flat_mode.dat","w") as file4:

                    file1.write('Agg-Agg collection for the flat orientation. \n'\
                                'Characteristic values taken from the peak of a fit \n'\
                                'gamma distribution from 300 aggregates. \n'\
                                'Minor axis taken as the smallest axis from the fit-ellipsoid \n'\
                                'Ranges are taken from the aggregates being pulled from the db, \n' \
                                'not the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')

                    file2.write('Agg-Agg collection for the flat orientation. \n'
                                'Characteristic values taken from the peak of a fit \n'\
                                'gamma distribution from 300 aggregates. \n'\
                                'Ranges are taken from the aggregates being pulled from the db, \n' \
                                'not the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')

                    file3.write('Agg-Agg collection for the flat orientation. \n'
                                'Mean value taken from the average across 300 aggregates. \n'\
                                'Major axis taken as the largest axis from the fit-ellipsoid \n'
                                'Ranges are taken from the aggregates being pulled from the db, \n' \
                                'not the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')
                    
                    file4.write('Agg-Agg collection for the flat orientation. \n'
                                'Volume ratio of agg subtracted from volume ratio of new agg (Vagg/Vellipse)\n'\
                                'Mode from 300 aggregates. \n'\
                                'Ranges are taken from the aggregates being pulled from the db, \n' \
                                'not the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')


                    for i in range(agg_phi_bins):
                        for r in range(agg_r_bins):
                            listaggphi = [n.c/n.a for n in pulled_clus[i,r,:]]
                            maxaggphi = max(listaggphi)
                            minaggphi = min(listaggphi)
                            listaggr = [n.r for n in pulled_clus[i,r,:]]
                            maxaggr = max(listaggr)
                            minaggr = min(listaggr)
                            #print(i,r)
                            file1.write('%.3f %.3f %.3f %.3f %.3f \n' %(minaggphi, maxaggphi, minaggr, maxaggr, agg_cs_ch[i,r]))
                            file2.write('%.3f %.3f %.3f %.3f %.3f \n' %(minaggphi, maxaggphi, minaggr, maxaggr, agg_as_ch[i,r]))
                            file3.write('%.3f %.3f %.3f %.3f %.3f \n' %(minaggphi, maxaggphi, minaggr, maxaggr, agg_as_mean[i,r]))
                            file4.write('%.3f %.3f %.3f %.3f %.3f \n' %(minaggphi, maxaggphi, minaggr, maxaggr, dds_mode[i,r]))
                                    
file1.close()
file2.close() 
file3.close()
file4.close()

In [None]:
#write to file for output as array:
#random orientation
#bin edges for range
with open("../../lookup_tables/agg_agg/newformat_minorax_ellipsoid_rand_ch_binedges.dat","w") as file1:
    with open("../../lookup_tables/agg_agg/newformat_majorax_ellipsoid_rand_ch_binedges.dat","w") as file2:
        with open("../../lookup_tables/agg_agg/newformat_majorax_ellipsoid_rand_mean_binedges.dat","w") as file3:
            with open("../../lookup_tables/agg_agg/newformat_dd_rand_mode_binedges.dat","w") as file4:

                    file1.write('Agg-Agg collection for the random orientation. \n'\
                                'Characteristic values taken from the peak of a fit \n'\
                                'gamma distribution from 300 aggregates. \n'\
                                'Minor axis taken as the smallest axis from the fit-ellipsoid \n'\
                                'Ranges are taken from the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')

                    file2.write('Agg-Agg collection for the random orientation. \n'
                                'Characteristic values taken from the peak of a fit \n'\
                                'gamma distribution from 300 aggregates. \n'\
                                'Ranges are taken from the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')

                    file3.write('Agg-Agg collection for the random orientation. \n'
                                'Mean value taken from the average across 300 aggregates. \n'\
                                'Major axis taken as the largest axis from the fit-ellipsoid \n'
                                'Ranges are taken from the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')
                    
                    file4.write('Agg-Agg collection for the random orientation. \n'
                                'Avg volume ratio of aggs subtracted from volume ratio of new agg (Vagg/Vellipse)\n'\
                                'Mode from 300 aggregates. \n'\
                                'Ranges are taken from the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')


                    for i in range(agg_phi_bins):
                        for r in range(agg_r_bins):

                            #print(i,r)
                            file1.write('%.3f %.3f %.3f %.3f %.3f \n' %(phi_bins[i], phi_bins[i+1], all_r_bins[i,r], all_r_bins[i,r+1], agg_cs_ch[i,r]))
                            file2.write('%.3f %.3f %.3f %.3f %.3f \n' %(phi_bins[i], phi_bins[i+1], all_r_bins[i,r], all_r_bins[i,r+1], agg_as_ch[i,r]))
                            file3.write('%.3f %.3f %.3f %.3f %.3f \n' %(phi_bins[i], phi_bins[i+1], all_r_bins[i,r], all_r_bins[i,r+1], agg_as_mean[i,r]))
                            file4.write('%.3f %.3f %.3f %.3f %.3f \n' %(phi_bins[i], phi_bins[i+1], all_r_bins[i,r], all_r_bins[i,r+1], dds_mode[i,r]))
                                    
file1.close()
file2.close() 
file3.close()
file4.close()

In [None]:
#write to file for output as array:
#random orientation
#sampled aggs for range
with open("../../lookup_tables/agg_agg/newformat_minorax_ellipsoid_rand_ch.dat","w") as file1:
    with open("../../lookup_tables/agg_agg/newformat_majorax_ellipsoid_rand_ch.dat","w") as file2:
        with open("../../lookup_tables/agg_agg/newformat_majorax_ellipsoid_rand_mean.dat","w") as file3:
            with open("../../lookup_tables/agg_agg/newformat_dd_rand_mode.dat","w") as file4:

                    file1.write('Agg-Agg collection for the random orientation. \n'\
                                'Characteristic values taken from the peak of a fit \n'\
                                'gamma distribution from 300 aggregates. \n'\
                                'Minor axis taken as the smallest axis from the fit-ellipsoid \n'\
                                'Ranges are taken from the aggregates being pulled from the db, \n' \
                                'not the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')

                    file2.write('Agg-Agg collection for the random orientation. \n'
                                'Characteristic values taken from the peak of a fit \n'\
                                'gamma distribution from 300 aggregates. \n'\
                                'Ranges are taken from the aggregates being pulled from the db, \n' \
                                'not the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')

                    file3.write('Agg-Agg collection for the random orientation. \n'
                                'Mean value taken from the average across 300 aggregates. \n'\
                                'Major axis taken as the largest axis from the fit-ellipsoid \n'
                                'Ranges are taken from the aggregates being pulled from the db, \n' \
                                'not the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')
                    
                    file4.write('Agg-Agg collection for the random orientation. \n'
                                'Volume ratio of agg subtracted from volume ratio of new agg (Vagg/Vellipse)\n'\
                                'Mode from 300 aggregates. \n'\
                                'Ranges are taken from the aggregates being pulled from the db, \n' \
                                'not the bin edges (maximum range) \n'\
                                'Order: agg phi min, agg phi max, agg r min, agg r max, value \n')


                    for i in range(agg_phi_bins):
                        for r in range(agg_r_bins):
                            listaggphi = [n.c/n.a for n in pulled_clus[i,r,:]]
                            maxaggphi = max(listaggphi)
                            minaggphi = min(listaggphi)
                            listaggr = [n.r for n in pulled_clus[i,r,:]]
                            maxaggr = max(listaggr)
                            minaggr = min(listaggr)
                            #print(i,r)
                            file1.write('%.3f %.3f %.3f %.3f %.3f \n' %(minaggphi, maxaggphi, minaggr, maxaggr, agg_cs_ch[i,r]))
                            file2.write('%.3f %.3f %.3f %.3f %.3f \n' %(minaggphi, maxaggphi, minaggr, maxaggr, agg_as_ch[i,r]))
                            file3.write('%.3f %.3f %.3f %.3f %.3f \n' %(minaggphi, maxaggphi, minaggr, maxaggr, agg_as_mean[i,r]))
                            file4.write('%.3f %.3f %.3f %.3f %.3f \n' %(minaggphi, maxaggphi, minaggr, maxaggr, dds_mode[i,r]))
                                    
file1.close()
file2.close() 
file3.close()
file4.close()