In [1]:
import geopandas
import pygeos
import pandas as pd
from time import time

from momepy_utils import get_edge_ratios, get_nodes

In [None]:
def link(chunk_id):
    s = time()
    cells = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    edges = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/edges/edges_{chunk_id}.pq")
    nodes = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/nodes/nodes_{chunk_id}.pq")
    
    cells['edgeID'] = get_edge_ratios(cells, edges)
    cells['nodeID'] = get_nodes(cells, nodes, edges, 'nodeID', 'edgeID', 'node_start', 'node_end')
    
    characters = ['sdsSPW', 'sdsSWD', 'sdsSPO', 'sdsLen', 'sssLin', 'ldsMSL']
    l = []
    for d in cells.edgeID:
        l.append((edges.iloc[list(d.keys())][characters].multiply(list(d.values()), axis='rows')).sum(axis=0))
    cells[characters] = pd.DataFrame(l, index=cells.index)
    
    cells = cells.merge(nodes.drop(columns=['geometry']), on='nodeID', how='left')
    cells = cells.rename({'degree': 'mtdDeg', 'meshedness': 'lcdMes', 'proportion_3': 'linP3W', 'proportion_4': 'linP4W',
                     'proportion_0': 'linPDE', 'local_closeness': 'lcnClo'}, axis='columns')
    
    cells['edgeID_keys'] = cells.edgeID.apply(lambda d: list(d.keys()))
    cells['edgeID_values'] = cells.edgeID.apply(lambda d: list(d.values()))
    
    cells.drop(columns='edgeID').to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    
    return f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds."

In [2]:
from dask.distributed import Client, LocalCluster, as_completed

In [3]:
workers = 14
client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))
client

0,1
Client  Scheduler: tcp://127.0.0.1:45911  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 14  Cores: 14  Memory: 84.28 GB


In [5]:
%%time
inputs = iter(range(103))
futures = [client.submit(link, next(inputs)) for i in range(workers)]
ac = as_completed(futures)
for finished_future in ac:
    # submit new future 
    try:
        new_future = client.submit(link, next(inputs))
        ac.add(new_future)
    except StopIteration:
        pass
    print(finished_future.result())

Chunk 11 processed sucessfully in 566.2533161640167 seconds.
Chunk 3 processed sucessfully in 574.3479053974152 seconds.
Chunk 10 processed sucessfully in 607.8959333896637 seconds.
Chunk 0 processed sucessfully in 614.784334897995 seconds.
Chunk 2 processed sucessfully in 669.9810400009155 seconds.
Chunk 9 processed sucessfully in 680.3808822631836 seconds.
Chunk 1 processed sucessfully in 710.4152896404266 seconds.
Chunk 7 processed sucessfully in 773.7010431289673 seconds.
Chunk 13 processed sucessfully in 777.4415595531464 seconds.
Chunk 8 processed sucessfully in 818.0839283466339 seconds.
Chunk 4 processed sucessfully in 859.4516260623932 seconds.
Chunk 12 processed sucessfully in 1117.8577580451965 seconds.
Chunk 15 processed sucessfully in 569.5318143367767 seconds.
Chunk 17 processed sucessfully in 604.9439563751221 seconds.
Chunk 18 processed sucessfully in 555.3542995452881 seconds.
Chunk 5 processed sucessfully in 1231.6413190364838 seconds.
Chunk 16 processed sucessfully i

In [14]:
client.close()

In [5]:
enclosures = pd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/enclosures.pq")

In [6]:
enclosures

Unnamed: 0,enclosureID,ldeAre,ldePer,lseCCo,lseERI,lseCWA,lteOri,neighbors,lteWNB,lieWCe
127225,109777,12611.387500,451.087222,0.626215,0.999515,45.089900,7.787018,15,0.033253,0.000372
127218,109770,3200.063800,226.380464,0.627116,0.999570,22.098473,8.047105,52,0.229702,0.000769
127217,109769,10837.702700,433.243236,0.554946,0.965459,55.097141,4.096679,6,0.013849,0.000379
127237,109789,19370.910000,657.491255,0.348630,0.999121,147.961000,2.281494,13,0.019772,0.000117
127238,109790,17780.420000,639.617669,0.338138,1.002238,149.521099,2.281494,11,0.017198,0.000760
...,...,...,...,...,...,...,...,...,...,...
82486,634854,19878.400766,1304.939638,0.069903,0.610820,653.750258,31.394525,5,0.003832,0.001436
82485,634853,26141.296973,1320.585561,0.086300,0.763426,641.777215,10.525297,11,0.008330,0.000298
82479,634847,14608.806297,706.295760,0.195696,0.862550,248.082465,3.028991,12,0.016990,0.001065
82482,634850,206576.769136,4305.068440,0.066546,0.690594,2176.761462,2.564588,17,0.003949,0.001236


In [12]:
def link(chunk_id):
    s = time()
    cells = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    
    cells = cells.merge(enclosures.drop(columns=['neighbors']), on='enclosureID', how='left')
    
    cells.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    
    return f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds."

In [15]:
for chunk_id in range(103):
    s = time()
    cells = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    
    cells = cells.merge(enclosures.drop(columns=['neighbors']), on='enclosureID', how='left')
    
    cells.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    
    print(f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds.")


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  import sys


Chunk 0 processed sucessfully in 6.70455002784729 seconds.
Chunk 1 processed sucessfully in 7.067269325256348 seconds.
Chunk 2 processed sucessfully in 6.917411804199219 seconds.
Chunk 3 processed sucessfully in 5.815715551376343 seconds.
Chunk 4 processed sucessfully in 9.109825611114502 seconds.
Chunk 5 processed sucessfully in 12.65186095237732 seconds.
Chunk 6 processed sucessfully in 13.935195446014404 seconds.
Chunk 7 processed sucessfully in 8.17983889579773 seconds.
Chunk 8 processed sucessfully in 8.334505558013916 seconds.
Chunk 9 processed sucessfully in 6.80608057975769 seconds.
Chunk 10 processed sucessfully in 6.317359209060669 seconds.
Chunk 11 processed sucessfully in 5.656877040863037 seconds.
Chunk 12 processed sucessfully in 10.782751321792603 seconds.
Chunk 13 processed sucessfully in 8.044702768325806 seconds.
Chunk 14 processed sucessfully in 8.065609216690063 seconds.
Chunk 15 processed sucessfully in 6.6622583866119385 seconds.
Chunk 16 processed sucessfully in 