In [None]:
import numpy as np
import pandas as pd
import glob
import seaborn as sns
import pickle
import matplotlib.pyplot as plt
import agg_properties
import sys
sys.path.append("../collection_from_db")
import ipas.cluster_calculations as cc
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=False)
import shapely.geometry as geom
import shapely.ops as shops
from shapely.geometry import Point
from multiprocessing import Pool
import tables
from dask import dataframe as dd

%load_ext memory_profiler
%load_ext autoreload
%autoreload 2

In [None]:
%%time 
#read in database of aggs (all the same monomers)
files = [f for f in glob.glob("../instance_files/createdb_iceagg_rand*")]
dfs = []
for file in files:
    print(file)
    dfs.append(pd.read_pickle(file, None))
dfs = [pd.DataFrame(i) for i in dfs]
df = pd.concat(dfs, axis=0, ignore_index=True)

In [None]:
def shape(a,b,c):
    if (b-c) <= (a-b):
        return 'prolate'
    else:
        return 'oblate'

In [None]:
df['agg_r'] = np.power((np.power(df['a'], 2) * df['c']), (1./3.))
df = df[df.agg_r < 5000]
#speed up shape function 
vfunc = np.vectorize(shape)
df['shape'] = vfunc(df['a'], df['b'], df['c'])
df['agg_phi'] = df.c/df.a

# df['agg_r'] = np.power((np.power(df['a'], 2) * df['c']), (1./3.))
# df['agg_r'] = df['agg_r'][df.agg_r < 5000]
# vfunc = np.vectorize(shape)
# df['shape'] = vfunc(df['a'], df['b'], df['c'])
# df['agg_phi'] = df.c/df.a

In [None]:
def filled_circular_area_ratio(row,  dims=['x', 'z']):
        '''returns the area of the largest contour divided by the area of
        an encompassing circle

        useful for spheres that have reflection spots that are not captured
        by the largest contour and leave a horseshoe pattern'''
        polygons = [geom.MultiPoint(row.points[n][dims]).convex_hull for n in range(row.ncrystals)]
        agg = shops.cascaded_union(polygons)
        area = agg.area
        poly = shops.cascaded_union(agg).convex_hull
        x, y = poly.exterior.xy
        c = cc.Cluster_Calculations(row)
        circ = c.make_circle([x[i], y[i]] for i in range(len(x)))
        circle = Point(circ[0], circ[1]).buffer(circ[2])
        x, y = circle.exterior.xy
        Ac = circle.area
        if row.mono_phi < 1.0 and row.ncrystals > 10:
            print(area/Ac, area, Ac, row.ncrystals)
        
        return area/Ac

In [None]:
for pos,row in df.iterrows():
    filled_circular_area_ratio(row)

In [None]:
%%time
df_att = df.apply(lambda x: filled_circular_area_ratio(x), axis=1)

In [None]:
df_att = pd.read_hdf('df_rand_area_ratio.h5').reset_index(drop=True) 

In [None]:
df_att = pd.DataFrame(df_att, columns=['area ratio'])
df_att

In [None]:
ddf = dd.read_parquet("../instance_files/parquet_files/*rand*", engine="pyarrow").compute()
ddf

In [None]:
ddf['agg_phi'] = ddf['c']/ddf['a']
ddf['agg_r'] = np.power((np.power(ddf['a'], 2) * ddf['c']), (1./3.))
#ddf['agg_r'] = ddf['agg_r'][ddf.agg_r < 5000]
ddf = ddf[ddf.agg_r < 5000].reset_index(drop=True)

In [None]:
ddf

In [None]:
dfc = pd.concat([df_att, ddf], axis=1)
dfc

In [None]:
#save df of IPAS attributes
dfc.to_hdf('df_area_ratio_no_points.h5', key='df_att', mode='w')

## read in IPAS attribute dataframe

In [None]:
df_att = pd.read_hdf('df_rand_attributes.h5')

In [None]:
df_att.columns

In [None]:
df.info

In [None]:
df.drop(columns=['points'])

In [None]:
cdf = pd.concat([df, df_att])
cdf

In [None]:
#read in CPI data 
#all campaings in one file
df_CPI = pd.read_csv('all_campaigns.csv')
#only use aggregates
df_CPI = df_CPI[(df_CPI['classification'] == 'agg')]

In [None]:
cdf = pd.concat([df_CPI, df_att], keys=['CPI', 'IPAS'], names=["Source"]).reset_index().drop(columns='level_1')