# Functional data

This notebook links various functional layers to ET cells across GB.

## Population estimates

In [1]:
import warnings

import geopandas as gpd
import pandas as pd
import numpy as np
import tobler
from time import time
import xarray
import rioxarray
import rasterstats

from dask.distributed import Client, LocalCluster, as_completed
import dask.dataframe as dd

In [2]:
warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')

In [2]:
population_est = gpd.read_parquet("../../urbangrammar_samba/functional_data/population_estimates/gb_population_estimates.pq")

In [3]:
population_est

Unnamed: 0,code,population,geometry
0,E00000095,508,"POLYGON ((549375.280 184970.971, 549375.032 18..."
1,E00000096,322,"POLYGON ((549003.992 184933.133, 549007.974 18..."
2,E00000097,348,"POLYGON ((548895.389 184911.808, 548897.030 18..."
3,E00000098,305,"POLYGON ((548320.415 184671.916, 548317.197 18..."
4,E00000001,251,"POLYGON ((532303.125 181877.594, 532301.345 18..."
...,...,...,...
6971,S01013477,638,"POLYGON ((308686.000 672220.000, 308688.000 67..."
6972,S01013478,809,"POLYGON ((307423.889 672579.756, 307424.594 67..."
6973,S01013479,758,"POLYGON ((308478.000 672440.000, 308485.000 67..."
6974,S01013480,703,"POLYGON ((308734.000 672598.000, 308743.000 67..."


In [15]:
chunk = gpd.read_parquet("../../urbangrammar_samba/spatial_signatures/tessellation/tess_0.pq")

In [8]:
chunk

Unnamed: 0,hindex,tessellation,buildings
0,c000e109777t0000,"POLYGON ((336287.376 427055.178, 336286.745 42...","POLYGON ((336281.920 427085.170, 336285.250 42..."
1,c000e109777t0001,"POLYGON ((336192.633 427056.666, 336192.607 42...","POLYGON ((336196.990 427062.730, 336209.420 42..."
2,c000e109777t0002,"POLYGON ((336202.056 427028.254, 336199.483 42...","POLYGON ((336198.720 427052.340, 336200.960 42..."
3,c000e109777t0003,"POLYGON ((336194.678 427020.851, 336196.952 42...","POLYGON ((336213.400 427028.670, 336214.800 42..."
4,c000e109777t0004,"POLYGON ((336220.377 427052.195, 336218.829 42...","POLYGON ((336248.750 427066.160, 336246.990 42..."
...,...,...,...
114648,c000e599810t0000,"POLYGON Z ((348790.934 452623.250 0.000, 34878...",
114649,c000e094707t0000,"POLYGON ((352410.100 452567.740, 352408.620 45...",
114650,c000e599602t0000,"POLYGON Z ((352035.703 452585.968 0.000, 35204...",
114651,c000e599795t0000,"POLYGON Z ((348790.934 452623.250 0.000, 34879...",


In [5]:
xmin, ymin, xmax, ymax = chunk.total_bounds

In [6]:
%%time
ests = tobler.area_weighted.area_interpolate(population_est.cx[xmin:xmax, ymin:ymax], chunk.set_geometry("buildings"), extensive_variables=['population'])

CPU times: user 6.69 s, sys: 18 ms, total: 6.71 s
Wall time: 6.71 s


In [9]:
ests.population.max()

373.49790543317795

In [10]:
for chunk_id in range(103):
    s = time()
    chunk = gpd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk_id}.pq", columns=["hindex", "buildings"]).set_geometry("buildings")
    xmin, ymin, xmax, ymax = chunk.total_bounds
    ests = tobler.area_weighted.area_interpolate(population_est.cx[xmin:xmax, ymin:ymax], chunk, extensive_variables=['population'])
    pop = pd.DataFrame({'hindex': chunk.hindex.values, "population": ests.population.values})
    pop.to_parquet(f"../../urbangrammar_samba/spatial_signatures/functional/population/pop_{chunk_id}")
    print(f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds.")

Chunk 0 processed sucessfully in 6.84471321105957 seconds.
Chunk 1 processed sucessfully in 8.95691466331482 seconds.
Chunk 2 processed sucessfully in 7.748736381530762 seconds.
Chunk 3 processed sucessfully in 8.444145202636719 seconds.
Chunk 4 processed sucessfully in 12.662679195404053 seconds.
Chunk 5 processed sucessfully in 33.37630248069763 seconds.
Chunk 6 processed sucessfully in 26.04380989074707 seconds.
Chunk 7 processed sucessfully in 8.81340217590332 seconds.
Chunk 8 processed sucessfully in 9.171989679336548 seconds.
Chunk 9 processed sucessfully in 7.381581783294678 seconds.
Chunk 10 processed sucessfully in 8.442428827285767 seconds.
Chunk 11 processed sucessfully in 8.21262001991272 seconds.
Chunk 12 processed sucessfully in 13.600036859512329 seconds.
Chunk 13 processed sucessfully in 9.243984937667847 seconds.
Chunk 14 processed sucessfully in 10.093465805053711 seconds.
Chunk 15 processed sucessfully in 7.550076246261597 seconds.
Chunk 16 processed sucessfully in 7

## Night lights

In [24]:
nl = xarray.open_rasterio("../../urbangrammar_samba/functional_data/employment/night_lights_osgb.tif")
nl_clip = nl.rio.clip_box(*chunk.total_bounds)
arr = nl_clip.values
affine = nl_clip.rio.transform()

In [25]:
%%time 
stats_nl = rasterstats.zonal_stats(
    chunk.tessellation, 
    raster=arr[0],
    affine=affine,
    stats=['mean'],
    all_touched=True,
    nodata = np.nan,
)

CPU times: user 1min 42s, sys: 6.97 s, total: 1min 49s
Wall time: 1min 44s


In [26]:
stats_nl

[{'mean': 5.210000038146973},
 {'mean': 4.674999952316284},
 {'mean': 4.674999952316284},
 {'mean': 4.674999952316284},
 {'mean': 4.674999952316284},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684}

In [20]:
workers = 8
client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))
client

0,1
Client  Scheduler: tcp://127.0.0.1:38779  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 8  Cores: 8  Memory: 134.91 GB


In [32]:
def _night_lights(chunk_id):
    import rioxarray
    
    s = time()
    
    chunk = gpd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk_id}.pq", columns=["hindex", "tessellation"])
    nl = xarray.open_rasterio("../../urbangrammar_samba/functional_data/employment/night_lights_osgb.tif")
    nl_clip = nl.rio.clip_box(*chunk.total_bounds)
    arr = nl_clip.values
    affine = nl_clip.rio.transform()
    stats_nl = rasterstats.zonal_stats(
        chunk.tessellation, 
        raster=arr[0],
        affine=affine,
        stats=['mean'],
        all_touched=True,
        nodata = np.nan,
    )
    chunk["night_lights"] = [x['mean'] for x in stats_nl]
    chunk[["hindex", "night_lights"]].to_parquet(f"../../urbangrammar_samba/spatial_signatures/functional/night_lights/nl_{chunk_id}")
    
    return f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds."

In [33]:
inputs = iter(range(103))
futures = [client.submit(_night_lights, next(inputs)) for i in range(workers)]
ac = as_completed(futures)
for finished_future in ac:
    # submit new future 
    try:
        new_future = client.submit(_night_lights, next(inputs))
        ac.add(new_future)
    except StopIteration:
        pass
    print(finished_future.result())

Chunk 3 processed sucessfully in 101.62825918197632 seconds.
Chunk 0 processed sucessfully in 108.6822566986084 seconds.
Chunk 2 processed sucessfully in 115.19350171089172 seconds.
Chunk 1 processed sucessfully in 122.15239238739014 seconds.
Chunk 7 processed sucessfully in 135.8249044418335 seconds.
Chunk 4 processed sucessfully in 151.0806565284729 seconds.
Chunk 11 processed sucessfully in 96.01190829277039 seconds.
Chunk 10 processed sucessfully in 106.74090838432312 seconds.
Chunk 5 processed sucessfully in 222.54443430900574 seconds.
Chunk 9 processed sucessfully in 118.09922075271606 seconds.
Chunk 8 processed sucessfully in 138.83542704582214 seconds.
Chunk 6 processed sucessfully in 245.05762553215027 seconds.
Chunk 13 processed sucessfully in 131.59788346290588 seconds.
Chunk 15 processed sucessfully in 100.00797629356384 seconds.
Chunk 12 processed sucessfully in 189.21026062965393 seconds.
Chunk 16 processed sucessfully in 110.26696038246155 seconds.
Chunk 17 processed suc

## Worplace population by industry

In [34]:
wpz = gpd.read_parquet('../../urbangrammar_samba/functional_data/employment/workplace/workplace_by_industry_gb.pq')

In [46]:
for chunk_id in range(103):
    s = time()
    chunk = gpd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk_id}.pq", columns=["hindex", "buildings"]).set_geometry("buildings")
    xmin, ymin, xmax, ymax = chunk.total_bounds
    ests = tobler.area_weighted.area_interpolate(wpz.cx[xmin:xmax, ymin:ymax], chunk, extensive_variables=wpz.columns[1:-1].to_list())
    ests['hindex'] = chunk.hindex.values
    ests.drop(columns="geometry").to_parquet(f"../../urbangrammar_samba/spatial_signatures/functional/workplace/pop_{chunk_id}")
    print(f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds.")

Chunk 0 processed sucessfully in 4.339667081832886 seconds.
Chunk 1 processed sucessfully in 4.707739353179932 seconds.
Chunk 2 processed sucessfully in 4.347376346588135 seconds.
Chunk 3 processed sucessfully in 3.9326553344726562 seconds.
Chunk 4 processed sucessfully in 6.005887031555176 seconds.
Chunk 5 processed sucessfully in 10.901556253433228 seconds.
Chunk 6 processed sucessfully in 10.061571836471558 seconds.
Chunk 7 processed sucessfully in 5.373244285583496 seconds.
Chunk 8 processed sucessfully in 5.54592227935791 seconds.
Chunk 9 processed sucessfully in 4.712031602859497 seconds.
Chunk 10 processed sucessfully in 4.334367275238037 seconds.
Chunk 11 processed sucessfully in 4.112551212310791 seconds.
Chunk 12 processed sucessfully in 7.6018900871276855 seconds.
Chunk 13 processed sucessfully in 5.514501094818115 seconds.
Chunk 14 processed sucessfully in 5.70710563659668 seconds.
Chunk 15 processed sucessfully in 5.131793260574341 seconds.
Chunk 16 processed sucessfully i

## CORINE Land cover

In [3]:
corine = gpd.read_parquet("../../urbangrammar_samba/functional_data/land_use/corine/corine_gb.pq")

In [4]:
corine

Unnamed: 0,Code_18,Remark,Area_Ha,ID,geometry
0,112,,1.893751e+02,EU_514927,"MULTIPOLYGON (((274002.422 39453.334, 273980.8..."
1,112,,2.696926e+01,EU_514928,"MULTIPOLYGON (((271153.999 39775.080, 271144.0..."
2,112,,4.202325e+01,EU_514929,"MULTIPOLYGON (((267987.903 40256.489, 267988.2..."
3,112,,2.576880e+01,EU_514930,"MULTIPOLYGON (((280792.702 43134.915, 280767.8..."
4,112,,2.976197e+01,EU_514931,"MULTIPOLYGON (((278839.382 42981.648, 278811.9..."
...,...,...,...,...,...
71785,231,,3.937547e+01,EU_1596493,"MULTIPOLYGON (((608620.221 323294.882, 608615...."
71786,231,,1.024217e+02,EU_1596519,"MULTIPOLYGON (((608837.110 324476.901, 608680...."
71787,112,,7.649168e+01,EU_1573352,"MULTIPOLYGON (((610635.771 323374.952, 610577...."
71788,512,,2.985387e+01,EU_1625523,"MULTIPOLYGON (((597901.209 325401.890, 597852...."


In [11]:
def _dask_binning(corine, cells, n_chunks=512):
    import dask_geopandas as dgpd
    from scipy.sparse import coo_matrix
    
    ids_src, ids_tgt = cells.sindex.query_bulk(corine.geometry, predicate="intersects")
    df = gpd.GeoDataFrame({'clc': corine.geometry.values[ids_src], 'tess': cells.geometry.values[ids_tgt]})
    ddf = dgpd.from_geopandas(df, npartitions=n_chunks)
    areas = ddf.clc.intersection(ddf.tess).area.compute()
    table = coo_matrix(
        (areas, (ids_src, ids_tgt),),
        shape=(corine.shape[0], cells.shape[0]),
        dtype=np.float32,
    )

    table = table.todok()

    return table


def _dask_area_interpolate(corine, cells, n_chunks=512, categorical_variables=None):
    table = _dask_binning(corine, cells, n_chunks)
    
    if categorical_variables:
        categorical = {}
        for variable in categorical_variables:
            unique = corine[variable].unique()
            for value in unique:
                mask = corine[variable] == value
                categorical[f"{variable}_{value}"] = np.asarray(
                    table[mask].sum(axis=0)
                )[0]

        categorical = pd.DataFrame(categorical)
        categorical = categorical.div(cells.area, axis="rows")
    
    return categorical

In [14]:
for chunk_id in range(103):
    s = time()
    chunk = gpd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk_id}.pq", columns=["hindex", "tessellation"])
    xmin, ymin, xmax, ymax = chunk.total_bounds
    ests = _dask_area_interpolate(corine.cx[xmin:xmax, ymin:ymax], chunk, categorical_variables=["Code_18"])
    ests['hindex'] = chunk.hindex.values
    ests.to_parquet(f"../../urbangrammar_samba/spatial_signatures/functional/corine/corine_{chunk_id}.pq")
    print(f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds.")

  a = np.array(


Chunk 0 processed sucessfully in 15.180331945419312 seconds.


  a = np.array(


Chunk 1 processed sucessfully in 30.089888095855713 seconds.


  a = np.array(


Chunk 2 processed sucessfully in 21.882530212402344 seconds.


  a = np.array(


Chunk 3 processed sucessfully in 17.526738166809082 seconds.


  a = np.array(


Chunk 4 processed sucessfully in 106.89093780517578 seconds.


  a = np.array(


Chunk 5 processed sucessfully in 317.46085810661316 seconds.


  a = np.array(


Chunk 6 processed sucessfully in 287.55741930007935 seconds.


  a = np.array(


Chunk 7 processed sucessfully in 52.53155994415283 seconds.


  a = np.array(


Chunk 8 processed sucessfully in 49.779944896698 seconds.


  a = np.array(


Chunk 9 processed sucessfully in 34.78387999534607 seconds.


  a = np.array(


Chunk 10 processed sucessfully in 72.16384053230286 seconds.


  a = np.array(


Chunk 11 processed sucessfully in 52.640920877456665 seconds.


  a = np.array(


Chunk 12 processed sucessfully in 65.59830093383789 seconds.


  a = np.array(


Chunk 13 processed sucessfully in 60.7873010635376 seconds.


  a = np.array(


Chunk 14 processed sucessfully in 76.49349236488342 seconds.


  a = np.array(


Chunk 15 processed sucessfully in 16.41744041442871 seconds.


  a = np.array(


Chunk 16 processed sucessfully in 19.981613636016846 seconds.


  a = np.array(


Chunk 17 processed sucessfully in 14.932554244995117 seconds.


  a = np.array(


Chunk 18 processed sucessfully in 15.702290296554565 seconds.


  a = np.array(


Chunk 19 processed sucessfully in 34.342867374420166 seconds.


  a = np.array(


Chunk 20 processed sucessfully in 39.59873700141907 seconds.


  a = np.array(


Chunk 21 processed sucessfully in 51.94429302215576 seconds.


  a = np.array(


Chunk 22 processed sucessfully in 56.69731068611145 seconds.


  a = np.array(


Chunk 23 processed sucessfully in 82.09859800338745 seconds.


  a = np.array(


Chunk 24 processed sucessfully in 28.112233877182007 seconds.


  a = np.array(


Chunk 25 processed sucessfully in 27.96365523338318 seconds.


  a = np.array(


Chunk 26 processed sucessfully in 21.680318593978882 seconds.


  a = np.array(


Chunk 27 processed sucessfully in 44.50490593910217 seconds.


  a = np.array(


Chunk 28 processed sucessfully in 15.241016864776611 seconds.


  a = np.array(


Chunk 29 processed sucessfully in 19.445778846740723 seconds.


  a = np.array(


Chunk 30 processed sucessfully in 75.84844732284546 seconds.


  a = np.array(


Chunk 31 processed sucessfully in 27.67076849937439 seconds.


  a = np.array(


Chunk 32 processed sucessfully in 130.36047387123108 seconds.


  a = np.array(


Chunk 33 processed sucessfully in 60.720449686050415 seconds.


  a = np.array(


Chunk 34 processed sucessfully in 46.61375951766968 seconds.


  a = np.array(


Chunk 35 processed sucessfully in 27.770570516586304 seconds.


  a = np.array(


Chunk 36 processed sucessfully in 17.652199506759644 seconds.


  a = np.array(


Chunk 37 processed sucessfully in 27.977357387542725 seconds.


  a = np.array(


Chunk 38 processed sucessfully in 224.82706594467163 seconds.


  a = np.array(


Chunk 39 processed sucessfully in 68.05412983894348 seconds.


  a = np.array(


Chunk 40 processed sucessfully in 65.66244673728943 seconds.


  a = np.array(


Chunk 41 processed sucessfully in 22.07956552505493 seconds.


  a = np.array(


Chunk 42 processed sucessfully in 90.9007716178894 seconds.


  a = np.array(


Chunk 43 processed sucessfully in 15.793612957000732 seconds.


  a = np.array(


Chunk 44 processed sucessfully in 13.336268901824951 seconds.


  a = np.array(


Chunk 45 processed sucessfully in 23.21897268295288 seconds.


  a = np.array(


Chunk 46 processed sucessfully in 55.5158896446228 seconds.


  a = np.array(


Chunk 47 processed sucessfully in 37.40347766876221 seconds.


  a = np.array(


Chunk 48 processed sucessfully in 126.36294674873352 seconds.


  a = np.array(


Chunk 49 processed sucessfully in 30.093411445617676 seconds.


  a = np.array(


Chunk 50 processed sucessfully in 33.95536017417908 seconds.


  a = np.array(


Chunk 51 processed sucessfully in 10.096921443939209 seconds.


  a = np.array(


Chunk 52 processed sucessfully in 24.450292825698853 seconds.


  a = np.array(


Chunk 53 processed sucessfully in 78.68081092834473 seconds.


  a = np.array(


Chunk 54 processed sucessfully in 59.39924621582031 seconds.


  a = np.array(


Chunk 55 processed sucessfully in 32.3205623626709 seconds.


  a = np.array(


Chunk 56 processed sucessfully in 22.943347215652466 seconds.


  a = np.array(


Chunk 57 processed sucessfully in 17.458800792694092 seconds.


  a = np.array(


Chunk 58 processed sucessfully in 42.03199028968811 seconds.


  a = np.array(


Chunk 59 processed sucessfully in 42.75125980377197 seconds.


  a = np.array(


Chunk 60 processed sucessfully in 37.35565733909607 seconds.


  a = np.array(


Chunk 61 processed sucessfully in 129.34060668945312 seconds.


  a = np.array(


Chunk 62 processed sucessfully in 28.503572940826416 seconds.


  a = np.array(


Chunk 63 processed sucessfully in 15.832542896270752 seconds.


  a = np.array(


Chunk 64 processed sucessfully in 90.61631035804749 seconds.


  a = np.array(


Chunk 65 processed sucessfully in 45.89105296134949 seconds.


  a = np.array(


Chunk 66 processed sucessfully in 127.9205219745636 seconds.


  a = np.array(


Chunk 67 processed sucessfully in 120.23514771461487 seconds.


  a = np.array(


Chunk 68 processed sucessfully in 37.70616149902344 seconds.


  a = np.array(


Chunk 69 processed sucessfully in 72.71402263641357 seconds.


  a = np.array(


Chunk 70 processed sucessfully in 18.224586248397827 seconds.


  a = np.array(


Chunk 71 processed sucessfully in 189.8400275707245 seconds.


  a = np.array(


Chunk 72 processed sucessfully in 35.05113673210144 seconds.


  a = np.array(


Chunk 73 processed sucessfully in 139.2181613445282 seconds.


  a = np.array(


Chunk 74 processed sucessfully in 50.77099657058716 seconds.


  a = np.array(


Chunk 75 processed sucessfully in 65.76493215560913 seconds.


  a = np.array(


Chunk 76 processed sucessfully in 51.584426164627075 seconds.


  a = np.array(


Chunk 77 processed sucessfully in 53.522892236709595 seconds.


  a = np.array(


Chunk 78 processed sucessfully in 47.52160692214966 seconds.


  a = np.array(


Chunk 79 processed sucessfully in 19.825929164886475 seconds.


  a = np.array(


Chunk 80 processed sucessfully in 30.792400598526 seconds.


  a = np.array(


Chunk 81 processed sucessfully in 19.92421293258667 seconds.


  a = np.array(


Chunk 82 processed sucessfully in 36.21914768218994 seconds.


  a = np.array(


Chunk 83 processed sucessfully in 175.30663919448853 seconds.


  a = np.array(


Chunk 84 processed sucessfully in 64.89760637283325 seconds.


  a = np.array(


Chunk 85 processed sucessfully in 80.92152333259583 seconds.


  a = np.array(


Chunk 86 processed sucessfully in 30.077708959579468 seconds.


  a = np.array(


Chunk 87 processed sucessfully in 267.03305554389954 seconds.


  a = np.array(


Chunk 88 processed sucessfully in 42.7241907119751 seconds.


  a = np.array(


Chunk 89 processed sucessfully in 84.39536690711975 seconds.


  a = np.array(


Chunk 90 processed sucessfully in 31.87593388557434 seconds.


  a = np.array(


Chunk 91 processed sucessfully in 93.81584119796753 seconds.


  a = np.array(


Chunk 92 processed sucessfully in 60.27356839179993 seconds.


  a = np.array(


Chunk 93 processed sucessfully in 34.05999040603638 seconds.


  a = np.array(


Chunk 94 processed sucessfully in 57.33334016799927 seconds.


  a = np.array(


Chunk 95 processed sucessfully in 75.45840859413147 seconds.


  a = np.array(


Chunk 96 processed sucessfully in 25.714704513549805 seconds.


  a = np.array(


Chunk 97 processed sucessfully in 150.45864415168762 seconds.


  a = np.array(


Chunk 98 processed sucessfully in 53.51958727836609 seconds.


  a = np.array(


Chunk 99 processed sucessfully in 44.32690238952637 seconds.


  a = np.array(


Chunk 100 processed sucessfully in 117.08002710342407 seconds.


  a = np.array(


Chunk 101 processed sucessfully in 47.73750948905945 seconds.


  a = np.array(


Chunk 102 processed sucessfully in 58.08854627609253 seconds.


## Retail centres

In [3]:
retail = gpd.read_file("../../urbangrammar_samba/functional_data/retail_centres/Pre Release.zip!Retail_Centres_UK.gpkg")

  for feature in features_lst:


In [4]:
workers = 16
client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))
client

0,1
Client  Scheduler: tcp://127.0.0.1:43993  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 16  Cores: 16  Memory: 134.91 GB


In [5]:
def measure_nearest(chunk):
    s = time()
    gdf = gpd.read_parquet(f'../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk}.pq')
    b = gdf.total_bounds
    
    initial_buffer = 500
    buffered = gdf.tessellation.buffer(initial_buffer)
    distance = []
    for orig, geom in zip(gdf.tessellation, buffered.geometry):
        query = retail.sindex.query(geom, predicate='intersects')
        b = initial_buffer
        while query.size == 0:
            query = retail.sindex.query(geom.buffer(b), predicate='intersects')
            b += initial_buffer

        distance.append(retail.iloc[query].distance(orig).min())
    gdf['nearest_retail_centre'] = distance
    gdf[['hindex', 'nearest_retail_centre']].to_parquet(f'../../urbangrammar_samba/spatial_signatures/functional/retail_centre/retail_{chunk}.pq')
    
    return f"Chunk {chunk} processed sucessfully in {time() - s} seconds."

In [6]:
inputs = iter(range(103))
futures = [client.submit(measure_nearest, next(inputs)) for i in range(workers)]
ac = as_completed(futures)
for finished_future in ac:
    # submit new future 
    try:
        new_future = client.submit(measure_nearest, next(inputs))
        ac.add(new_future)
    except StopIteration:
        pass
    print(finished_future.result())

Chunk 3 processed sucessfully in 229.41065430641174 seconds.
Chunk 15 processed sucessfully in 223.6650960445404 seconds.
Chunk 0 processed sucessfully in 244.80605340003967 seconds.
Chunk 2 processed sucessfully in 262.985951423645 seconds.
Chunk 11 processed sucessfully in 267.57428884506226 seconds.
Chunk 1 processed sucessfully in 295.50642371177673 seconds.
Chunk 9 processed sucessfully in 307.793461561203 seconds.
Chunk 10 processed sucessfully in 320.450528383255 seconds.
Chunk 7 processed sucessfully in 332.8020794391632 seconds.
Chunk 8 processed sucessfully in 341.40525555610657 seconds.
Chunk 13 processed sucessfully in 357.23317408561707 seconds.
Chunk 18 processed sucessfully in 197.36008834838867 seconds.
Chunk 16 processed sucessfully in 216.73886609077454 seconds.
Chunk 17 processed sucessfully in 217.3698329925537 seconds.
Chunk 14 processed sucessfully in 452.75913739204407 seconds.
Chunk 4 processed sucessfully in 480.302627325058 seconds.
Chunk 12 processed sucessfu

## Water

In [None]:
from sqlalchemy import create_engine
from shapely.geometry import box
from shapely.ops import polygonize

user = os.environ.get('DB_USER')
pwd = os.environ.get('DB_PWD')
host = os.environ.get('DB_HOST')
port = os.environ.get('DB_PORT')

db_connection_url = f"postgres+psycopg2://{user}:{pwd}@{host}:{port}/built_env"

In [None]:
def measure_nearest(chunk):
    s = time()
    gdf = gpd.read_parquet(f'../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk}.pq')
    b = gdf.total_bounds
    engine = create_engine(db_connection_url)
    sql = f'SELECT * FROM gb_coastline_2016 WHERE ST_Intersects(geometry, ST_MakeEnvelope({b[0]}, {b[1]}, {b[2]}, {b[3]}, 27700))'
    coastline = gpd.read_postgis(sql, engine, geom_col='geometry')
    sql = f'SELECT * FROM openmap_surfacewater_area_200824 WHERE ST_Intersects(geometry, ST_MakeEnvelope({b[0]}, {b[1]}, {b[2]}, {b[3]}, 27700))'
    water = gpd.read_postgis(sql, engine, geom_col='geometry')
    
    sql = f'SELECT * FROM gb_coastline_2016'
    coastline = gpd.read_postgis(sql, engine, geom_col='geometry')

    polys = polygonize(coastline.geometry)
    land = gpd.GeoSeries(polys, crs=27700)
    sea = box(*land.total_bounds).difference(land.geometry.unary_union)
    
    target = water.geometry
    target.loc[len(water)] = sea
    target = gpd.clip(target, box(*b))
    
    initial_buffer = 500
    buffered = gdf.tessellation.buffer(initial_buffer)
    distance = []
    for orig, geom in zip(gdf.tessellation, buffered.geometry):
        query = target.sindex.query(geom, predicate='intersects')
        b = initial_buffer
        while query.size == 0:
            query = target.sindex.query(geom.buffer(b), predicate='intersects')
            b += initial_buffer

        distance.append(target.iloc[query].distance(orig).min())
    gdf['nearest_water'] = distance
    gdf[['hindex', 'nearest_water']].to_parquet(f'../../urbangrammar_samba/spatial_signatures/functional/water/water_{chunk}.pq')
    
    return f"Chunk {chunk} processed sucessfully in {time() - s} seconds."