In [1]:
import geopandas
import libpysal
import scipy
import pandas as pd
import numpy as np
from time import time
import os.path

from dask.distributed import Client, LocalCluster, as_completed

In [3]:
cross_chunk = pd.read_parquet('../../urbangrammar_samba/spatial_signatures/cross-chunk_indices.pq')

In [4]:
def convolute(chunk_id):
    s = time()
    cells = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    cells['keep'] = True
    # add neighbouring cells from other chunks
    cross_chunk_cells = []

    for chunk, inds in cross_chunk.loc[chunk_id].indices.iteritems():
        add_cells = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk}.pq").iloc[inds]
        add_cells['keep'] = False
        cross_chunk_cells.append(add_cells)

    df = cells.append(pd.concat(cross_chunk_cells, ignore_index=True), ignore_index=True)

    # read W
    w = libpysal.weights.WSP(scipy.sparse.load_npz(f"../../urbangrammar_samba/spatial_signatures/weights/w3_{chunk_id}.npz")).to_W()

    characters = ['ltbIBD', 'stbCeA']
    
    convolutions = {}
    for c in characters:
        convolutions[c] = []
        
    for i in range(len(df)):
        neighbours = [i]
        neighbours += w.neighbors[i]

        vicinity = df.iloc[neighbours]

        for c in characters:
            convolutions[c].append(np.nanpercentile(vicinity[c], [25, 50, 75], interpolation='midpoint'))
    
    conv = pd.DataFrame(convolutions)
    exploded = pd.concat([pd.DataFrame(conv[c].to_list(), columns=[c + '_q1', c + '_q2',c + '_q3']) for c in characters], axis=1)
    
    existing = pd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{chunk_id}.pq")
    
    existing[exploded.columns] = exploded[df.keep]
    existing.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{chunk_id}.pq")
        
    return f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds."

In [5]:
workers = 8
client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))
client

0,1
Client  Scheduler: tcp://127.0.0.1:45725  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 8  Cores: 8  Memory: 84.28 GB


In [6]:
%%time
inputs = iter(range(103))
futures = [client.submit(convolute, next(inputs)) for i in range(workers)]
ac = as_completed(futures)
for finished_future in ac:
    # submit new future 
    try:
        new_future = client.submit(convolute, next(inputs))
        ac.add(new_future)
    except StopIteration:
        pass
    print(finished_future.result())

Chunk 0 processed sucessfully in 208.44732093811035 seconds.
Chunk 3 processed sucessfully in 248.29846453666687 seconds.
Chunk 2 processed sucessfully in 276.4308907985687 seconds.
Chunk 1 processed sucessfully in 289.92260479927063 seconds.
Chunk 7 processed sucessfully in 324.9429392814636 seconds.
Chunk 4 processed sucessfully in 358.24270963668823 seconds.
Chunk 5 processed sucessfully in 431.5343999862671 seconds.
Chunk 9 processed sucessfully in 259.7171881198883 seconds.
Chunk 8 processed sucessfully in 302.14715337753296 seconds.
Chunk 11 processed sucessfully in 226.80128169059753 seconds.
Chunk 10 processed sucessfully in 248.843092918396 seconds.
Chunk 6 processed sucessfully in 555.3480136394501 seconds.
Chunk 13 processed sucessfully in 289.82896614074707 seconds.
Chunk 12 processed sucessfully in 381.17248272895813 seconds.
Chunk 14 processed sucessfully in 297.7357301712036 seconds.
Chunk 15 processed sucessfully in 234.40446066856384 seconds.
Chunk 18 processed sucessf

In [7]:
client.close()

In [7]:
cells = pd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_0.pq")

In [8]:
cells.isna().any(axis=1).sum()

0

In [9]:
for chunk_id in range(103):
    cells = pd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{chunk_id}.pq")
    print(f"Chunk {chunk_id} has {cells.isna().any(axis=1).sum()} rows with at least one NaN.")

Chunk 0 has 0 rows with at least one NaN.
Chunk 1 has 0 rows with at least one NaN.
Chunk 2 has 0 rows with at least one NaN.
Chunk 3 has 0 rows with at least one NaN.
Chunk 4 has 0 rows with at least one NaN.
Chunk 5 has 167 rows with at least one NaN.
Chunk 6 has 0 rows with at least one NaN.
Chunk 7 has 0 rows with at least one NaN.
Chunk 8 has 0 rows with at least one NaN.
Chunk 9 has 0 rows with at least one NaN.
Chunk 10 has 3 rows with at least one NaN.
Chunk 11 has 0 rows with at least one NaN.
Chunk 12 has 0 rows with at least one NaN.
Chunk 13 has 2 rows with at least one NaN.
Chunk 14 has 3 rows with at least one NaN.
Chunk 15 has 0 rows with at least one NaN.
Chunk 16 has 0 rows with at least one NaN.
Chunk 17 has 0 rows with at least one NaN.
Chunk 18 has 0 rows with at least one NaN.
Chunk 19 has 0 rows with at least one NaN.
Chunk 20 has 3 rows with at least one NaN.
Chunk 21 has 0 rows with at least one NaN.
Chunk 22 has 0 rows with at least one NaN.
Chunk 23 has 0 rows